Skip to content
Commits on Source (2)
......@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
## [1.1.1]
### Added
- management task to import OrganizationEconomics, with historical records, from ATOKA
### Changed
- is_public flag (SPA) and capital_stock added to OrganizationEconomics
## [1.1.0]
### Fixed
......
......@@ -3,4 +3,4 @@
Openpolis Data Manager service package (backend)
"""
__version__ = '1.1.0'
__version__ = '1.1.1'
......@@ -28,7 +28,7 @@ from popolo.models import (
from django.db import models, transaction
from haystack import signals
from project.connections.atoka import AtokaConn, AtokaObjectDoesNotExist, AtokaException
from project.atoka.connections import AtokaConn, AtokaObjectDoesNotExist, AtokaException
codicefiscale._DATA["municipalities"].update(
{
......@@ -1510,8 +1510,9 @@ class OrganizationUtils(object):
def org_anagraphical_lookup(cls, item, logger, current=False):
"""anagraphical lookup strategy implementation
:param item: the item to lookup in the DB
:param logger: the logger
:param item: the item to lookup in the DB
:param logger: the logger
:param current: only look for current data (dissolution_date None)
:return: the id if found, 0 if not found, negative number of multiples if multiples found
"""
......
......@@ -8,11 +8,11 @@ from unittest.mock import MagicMock, patch
from popolo.tests.factories import AreaFactory, PersonFactory
from project.api_v1.core import PersonUtils, millis
from project.connections.atoka import AtokaConn, AtokaObjectDoesNotExist, AtokaMultipleObjectsReturned, \
from project.atoka.connections import AtokaConn, AtokaObjectDoesNotExist, AtokaMultipleObjectsReturned, \
AtokaResponseError
from faker import Factory
from project.connections.tests import get_person_ok
from project.atoka.tests import get_person_ok
faker = Factory.create("it_IT") # a factory to create fake data for tests
......
......@@ -3,7 +3,7 @@ from typing import Type, Union
from opdmetl.extractors import Extractor
from project.connections.atoka import AtokaConn, AtokaObjectDoesNotExist
from project.atoka.connections import AtokaConn, AtokaObjectDoesNotExist
class AtokaOwnershipsExtractor(Extractor):
......@@ -29,7 +29,7 @@ class AtokaOwnershipsExtractor(Extractor):
Given a list of tax_ids in self.batch, queries Atoka API, in order to retrieve oned shares and roles.
Ownerships are returned as a list.
Ownerships are returned as a list in the results.
Each element in the list represents an **owner**, and has major identifiers, classifications and
**owned organizations**.
Owned organizations are embedded in the `shares_owned` list.
......@@ -301,3 +301,116 @@ class AtokaOwnershipsExtractor(Extractor):
},
'results': results
}
class AtokaEconomicsExtractor(Extractor):
""":class:`Extractor` for extractions of economics information out of the Atoka API
Uses methods in the `AtokaConn` class, to extract information from atoka api.
"""
def __init__(self, batch: list):
"""Create a new instance of the extractor, to extract info from a batch of companies.
Args:
batch: a list of tax_ids for companies lookup in Atoka
Returns:
instance of a :class:`AtokaEconomicsExtractor`
"""
self.batch = batch
super().__init__()
def extract(self, **kwargs) -> dict:
"""Extract meaningful information from Atoka.
Given a list of tax_ids in self.batch, queries Atoka API, in order to retrieve ecomomics details.
Results are returned as a dict.
{
'meta': {
'atoka_requests': {
'companies': atoka_companies_requests
},
},
'results': results
}
Each element in the `result` list contains all organization economics details,
and the name and all major identifiers, are shown for clarity.
The sections found in atoka's `economics` package are extracted
[
{
'id': atoka_id,
'tax_id': tax_id,
'other_atoka_ids': [],
'name': name,
'economics': {
'public': false,
'capitalStock': { 'value': 10000 },
'balanceSheets': [
{
'year': 2018,
'latest': True,
'revenue': 1000000,
'revenuteTrend': 0.24,
...
},
...
],
'employees': [
{
'year': 2018,
'latest': True,
'value': 81
},
...
]
}
},
...
]
:return: a list of organization id and names, with economics detail
"""
atoka_ids = self.batch
atoka_conn = AtokaConn()
atoka_companies_requests = 0
# fetch all companies among the list having govType values set
# will need batch_size=1 here, because shares may contain many results
# and the limit is 50
try:
res_tot = atoka_conn.get_companies_from_atoka_ids(
atoka_ids, packages='base,economics', active='true', batch_size=1
)
atoka_companies_requests += len(res_tot)
self.logger.debug(
"- da {0} tax_ids, ricevuti da Atoka dettagli per {1} istituzioni "
"(ci possono essere doppioni)".format(
len(atoka_ids), len(res_tot)
)
)
except AtokaObjectDoesNotExist:
res_tot = []
results = []
for r in res_tot:
tax_id = r['base']['taxId']
r.pop('base')
r_dict = r
r_dict['tax_id'] = tax_id
results.append(r_dict)
return {
'meta': {
'atoka_requests': {
'companies': atoka_companies_requests
},
},
'results': results
}
from popolo.models import Organization
from project.api_v1.etl.loaders import PopoloLoader
from project.atoka.models import OrganizationEconomics
class OrganizationEconomicsLoader(PopoloLoader):
""":class:`Loader` that stores data in ``atoka.OrganizationEconomics`` instances
The generic `load` method can still be overridden in subclasses,
should peculiar necessities arise (as bulk loading).
"""
areas_dict = None
def __init__(self, **kwargs):
"""Create a new instance of the loader
Args:
lookup_strategy: anagraphical, identifier, mixed select the lookup strategy to use
identifier_scheme: the scheme to use whith the mixer/identifier lookup strategies (OP_ID, CF, ...)
update_strategy: how to update the Organization
- `keep_old`: only write fields that are empty, keeping old values
- `overwrite`: overwrite all fields
- `overwrite_minint_opdm`: partial overwrite
see: https://gitlab.depp.it/openpolis/opdm/opdm-project/wikis/import/update-amministratori-locali
"""
super(OrganizationEconomicsLoader, self).__init__(**kwargs)
self.identifier_scheme = kwargs.get('identifier_scheme', 'ATOKA_ID')
self.update_strategy = kwargs.get('update_strategy', 'overwrite')
def load_item(self, item, **kwargs):
"""load Organization into the Popolo models
lookup an organization, with strategy defined in self.lookup_strategy
invoke update_or_create_from_item (anagraphical data plus identifiers, contacts, ...)
:param item: the item to be loaded
:return:
"""
_id = item.pop(self.identifier_scheme.lower())
historical_values = item.pop('historical_values', [])
try:
org = Organization.objects.get(
identifiers__scheme=self.identifier_scheme,
identifiers__identifier=_id,
)
except Organization.DoesNotExist:
self.logger.error("Could not find organization with {0}:{1} ({2})".format(
self.identifier_scheme, _id, item['name'])
)
return
# update or create logic that use defined update_strategy
defaults = {
k.replace('latest_', ''): v for k, v in item.items() if k not in ['tax_id', 'name']
}
try:
o, created = OrganizationEconomics.objects.get_or_create(
organization=org,
defaults=defaults
)
if created:
self.logger.debug("Economics details created for org with {0}:{1}".format(self.identifier_scheme, _id))
else:
self.logger.debug("Economics details updated for org with {0}:{1}".format(self.identifier_scheme, _id))
for k, v in defaults.items():
if self.update_strategy == "overwrite" or not getattr(o, k):
setattr(o, k, v)
o.save()
except Exception as e:
self.logger.error("{0} when loading organization {1}".format(e, item))
return
for hv in historical_values:
year = hv.get('year', None)
# skip insertion for info not labeled with a year
if year is None:
continue
# update or create logic that use defined update_strategy
hv_defaults = {
k: v for k, v in hv.items() if k not in ['currency', 'date', 'year']
}
oh, created = o.historical_values.get_or_create(
year=year,
defaults=hv_defaults
)
if created:
self.logger.debug(
"Historical economics details created for org with {0}:{1}".format(self.identifier_scheme, _id)
)
else:
self.logger.debug(
"Historical economics details updated for org with {0}:{1}".format(self.identifier_scheme, _id)
)
for k, v in hv_defaults.items():
if self.update_strategy == "overwrite" or not getattr(o, k):
setattr(o, k, v)
o.save()
import typing
from operator import itemgetter
from typing import Union
from popolo.models import Classification
......@@ -81,12 +83,12 @@ class AtokaOwnershipTransformation(Transformation):
od = self.filter_rows(od)
def ownerships_from_item(i: dict) -> list:
ownerships = []
_ownerships = []
for owned in i['shares_owned']:
owning_org_tax_id = i.get("tax_id", None)
owned_org_tax_id = owned.get("tax_id", None)
if owning_org_tax_id and owned_org_tax_id:
ownership = {
_ownership = {
"owning_org": {
"identifier": owning_org_tax_id,
},
......@@ -101,9 +103,9 @@ class AtokaOwnershipTransformation(Transformation):
}
ownerships.append(ownership)
_ownerships.append(_ownership)
return ownerships
return _ownerships
def get_index(o: dict) -> tuple:
"""Return unique hashable index for ownership dict
......@@ -850,7 +852,7 @@ class AtokaMembershipTransformation(Transformation):
}.get(atoka_role.lower(), None)
def memberships_from_item(i: dict, orgs_dict: dict) -> list:
memberships = []
_memberships = []
for owned in i['shares_owned']:
owned_org_tax_id = owned.get("tax_id", None)
roles = owned.get("roles", [])
......@@ -924,8 +926,8 @@ class AtokaMembershipTransformation(Transformation):
}
],
}
memberships.append(person_roles)
return memberships
_memberships.append(person_roles)
return _memberships
orgs_tax_ids = []
for item in od:
......@@ -939,19 +941,19 @@ class AtokaMembershipTransformation(Transformation):
# store processed data into the Transformation instance
# use unique_set as index, to avoid duplications
def get_index(m: dict) -> tuple:
def get_index(_m: dict) -> tuple:
"""Return unique hashable index for person_roles dict
:param m: dict containing all personal roles
:param _m: dict containing all personal roles
:return: tuple
"""
idx = [
m["given_name"],
m["family_name"],
m["birth_date"],
m["birth_location"],
_m["given_name"],
_m["family_name"],
_m["birth_date"],
_m["birth_location"],
]
for mm in m["memberships"]:
for mm in _m["memberships"]:
idx.extend((mm["label"], mm["organization_id"], mm["start_date"]))
return tuple(idx)
......@@ -964,3 +966,210 @@ class AtokaMembershipTransformation(Transformation):
if m_index not in unique_set:
self.etl.processed_data.append(m)
unique_set.add(m_index)
class AtokaOrganizationEconomicsTransformation(Transformation):
"""Transform economics information extracted from Atoka, into a data structure useable in a PopoloLoader.
Used to create or upgrade OrganizationEconomic details found in atoka.
Original information from ATOKA
------------------------------------------
{
'id': atoka_id,
'tax_id': tax_id,
'name': name,
'economics': {
'public': false,
'capitalStock': { 'value': 10000 },
'balanceSheets': [
{
'year': 2018,
'latest': True,
'revenue': 1000000,
'revenuteTrend': 0.24,
...
},
...
],
'employees': [
{
'year': 2018,
'latest': True,
'value': 81
},
...
]
}
},
Information as needed by loader
------------------------------------
{
"atoka_id": "7058f762c20c",
"tax_id": "00040450074",
"name": "AUTOPORTO VALLE D'AOSTA - S.P.A.",
"is_public": false,
"revenue": 2545000,
"revenue_trend": 0.0063,
"capital_stock": 31270000,
"assets": null,
"costs": null,
"ebitda": null,
"mol": null,
"net_financial_position": null,
"production": null,
"profit": null,
"purchases": null,
"raw_materials_variation": null,
"services_and_tp_goods_charges": null,
"staff_costs": null,
"employees": 13
"historical_values": [
{
"year": 2018,
"employees": 13,
},
{
"year": 2017,
"capital_stock": 31270000,
"revenue": 2545000,
"revenue_trend": 0.0063,
"employees": 15,
},
{
"year": 2016,
"capital_stock": 31270000,
"revenue": 2529000,
"revenue_trend": -0.016,
"employees": 15,
},
{
"year": 2015,
"capital_stock": 31270000,
"revenue": 2570000,
"revenue_trend": -0.0019,
}
],
}
"""
@staticmethod
def filter_rows(od):
# filter out results with no economics section
return filter(lambda x: 'economics' in x, od)
def transform(self):
""" Transform a list of dicts extracted from the ATOKA API,
:return: the ETL instance (to chain methods)
"""
self.logger.debug("start of transform")
self.logger.debug(" get a copy of the original dataframe")
od = self.etl.original_data
# apply subclass-specific filters (metro/provinces separation)
od = list(self.filter_rows(od))
self.logger.debug(
"- {0} organizzzioni hanno dettagli economici".format(
len(list(od))
)
)
def normalize_field_names(_item: dict) -> dict:
"""Normalize field names from camelCase to _"""
norm = {
'revenueTrend': 'revenue_trend',
'capitalStock': 'capital_stock',
'netFinancialPosition': 'net_financial_position',
'rawMaterialsVariation': 'raw_materials_variation',
'servicesAndTPGoodsCharges': 'services_and_tp_goods_charges',
'staffCosts': 'staff_costs'
}
return {norm.get(k, k): v for k, v in _item.items()}
def fetch_one_per_year(items: list, key_field: str = 'date', max_items: int = 3) -> list:
"""Fetch only one value per year, after sorting by dates"""
try:
sorted_items = sorted(items, key=itemgetter(key_field), reverse=True)
except KeyError:
return [items[0]]
fetched_items = []
for _item in sorted_items:
if _item['year'] not in [i['year'] for i in fetched_items]:
fetched_items.append(normalize_field_names(_item))
return fetched_items[:min(len(fetched_items), max_items)]
def fetch_latest(items: list) -> typing.Union[dict, None]:
"""Fetch first items containing 'latest', should be one
:param items:
:return:
"""
try:
return normalize_field_names(next(filter(lambda x: x['latest'] is True, items)))
except StopIteration:
if len(items) == 1:
return normalize_field_names(items[0])
else:
return {}
def org_from_item(i: dict) -> dict:
r_dict = {'atoka_id': i['id']}
for k in ['tax_id', 'name']:
r_dict[k] = i[k]
r_dict['is_public'] = i['economics'].get('public', False)
historical_values = []
if 'balanceSheets' in i['economics']:
# copy historical values
balance_sheets = fetch_one_per_year(i['economics']['balanceSheets'], max_items=4)
for sheet in balance_sheets:
historical_values.append({
k: v for k, v in sheet.items() if k not in ['latest', ]
})
# fetch latest values in balanceSheets and move under r_dict (as latest)
for norm_field in [
'revenue', 'revenue_trend', 'capital_stock',
'assets', 'costs', 'ebitda', 'mol', 'net_financial_position',
'production', 'profit', 'purchases', 'raw_materials_variation',
'services_and_tp_goods_charges', 'staff_costs'
]:
r_dict['{0}'.format(norm_field)] = fetch_latest(balance_sheets).get(norm_field, None)
# correct capital_stock, if found under economics
if i['economics'].get('capitalStock', None):
r_dict['capital_stock'] = i['economics']['capitalStock']['value']
if 'employees' in i['economics']:
# distribute employees through historical_values historical values
employees = fetch_one_per_year(i['economics']['employees'])
for employee in employees:
year = employee.get('year', None)
hist = next((hv for hv in historical_values if hv["year"] == year), None)
if hist is None:
historical_values.append({'year': year, 'employees': employee['value']})
else:
hist['employees'] = employee['value']
# fetch latest value
r_dict['employees'] = fetch_latest(i['economics']['employees']).get('value', None)
r_dict['historical_values'] = historical_values
return r_dict
# store processed data into the Transformation instance
# uses a set to avoid duplications
self.etl.processed_data = []
unique_set = set()
for item in od:
org = org_from_item(item)
index = org['atoka_id']
if index and index not in unique_set:
self.etl.processed_data.append(org)
unique_set.add(index)
......@@ -5,7 +5,7 @@ from popolo.models import Organization
from taskmanager.utils import LoggingBaseCommand
from project.api_v1.core import batch_generator
from project.api_v1.etl.extractors import AtokaEconomicsExtractor
from project.atoka.etl.extractors import AtokaEconomicsExtractor
class Command(LoggingBaseCommand):
......@@ -27,7 +27,7 @@ class Command(LoggingBaseCommand):
parser.add_argument(
"--json-file",
dest="jsonfile",
default="./resources/out/atoka.json",
default="./resources/data/atoka/atoka_economics.json",
help="Complete path to json file"
)
......@@ -41,59 +41,50 @@ class Command(LoggingBaseCommand):
self.logger.info("Start procedure")
# start filtering current organizations with a tax_id,
# excluding those classified as private
# excluding public intitutions
organizations_qs = Organization.objects.filter(
identifiers__scheme='ATOKA_ID'
).current()
).current().exclude(
classifications__classification__scheme='FORMA_GIURIDICA_OP',
classifications__classification__descr__in=['Comune', 'Comunità montana o isolana', 'Provincia', 'Regione']
)
atoka_records = []
atoka_companies_requests = 0
atoka_people_requests = 0
people_ids = []
owned_ids = []
counter = 0
# generate batches of batchsize, to query atoka's endpoint
total_count = organizations_qs.values_list('identifiers__identifier', flat=True).distinct().count()
batches = batch_generator(
batchsize, organizations_qs.values_list('identifier', flat=True).distinct().iterator()
batchsize, organizations_qs.values_list('identifiers__identifier', flat=True).distinct().iterator()
)
group_counter = 0
for tax_ids_batch in batches:
for _ids_batch in batches:
# extract economics information for organizations from ATOKA
# implement offset
if counter >= offset:
atoka_extractor = AtokaEconomicsExtractor(tax_ids_batch)
atoka_extractor = AtokaEconomicsExtractor(_ids_batch)
atoka_extractor.logger = self.logger
atoka_res = atoka_extractor.extract()
atoka_records.extend(atoka_res['results'])
atoka_companies_requests += atoka_res['meta']['atoka_requests']['companies']
atoka_people_requests += atoka_res['meta']['atoka_requests']['people']
people_ids.extend(atoka_res['meta']['ids']['people'])
people_ids = list(set(people_ids))
owned_ids.extend(atoka_res['meta']['ids']['companies'])
owned_ids = list(set(owned_ids))
group_counter += len(tax_ids_batch)
counter += len(_ids_batch)
self.logger.info("{0} tax_ids, {1} partecipate, {2} persone --------".format(
group_counter, len(owned_ids), len(people_ids)
))
self.logger.info("{0}/{1}".format(counter, total_count))
else:
self.logger.info("skipping {0} tax_ids".format(
len(tax_ids_batch)
counter += len(_ids_batch)
self.logger.info("skipping {0} ids".format(
len(_ids_batch)
))
self.logger.debug("")
counter += len(tax_ids_batch)
self.logger.info(
"crediti spesi con atoka: {0} companies, {1} people".format(
atoka_companies_requests, atoka_people_requests
......
......@@ -34,7 +34,7 @@ class Command(LoggingBaseCommand):
parser.add_argument(
"--json-file",
dest="jsonfile",
default="./resources/out/atoka.json",
default="./resources/data/atoka/atoka.json",
help="Complete path to json file"
)
......
......@@ -9,7 +9,7 @@ from project.api_v1.etl.extractors import JsonArrayExtractor, ListExtractor
from project.atoka.etl.transformations import \
AtokaOwnershipTransformation, \
AtokaMembershipTransformation, \
AtokaOrganizationTransformation
AtokaOrganizationTransformation, AtokaOrganizationEconomicsTransformation
class Command(LoggingBaseCommand):
......@@ -33,7 +33,7 @@ class Command(LoggingBaseCommand):
dest="contexts",
metavar='CONTEXT',
nargs='*',
default=['organizations', 'ownerships', 'persons_memberships'],
default=['organizations', 'organization_economics', 'ownerships', 'persons_memberships'],
help="Whether to check membership labels when updating",
)
......@@ -58,6 +58,16 @@ class Command(LoggingBaseCommand):
# source="http://api.atoka.it"
)()
if 'organizations_economics' in contexts:
self.logger.info("organizations_economics")
ETL(
extractor=ListExtractor(atoka_records),
transformation=AtokaOrganizationEconomicsTransformation(),
loader=JsonLoader(os.path.join(json_out_path, "atoka_organizations_economics.json")),
log_level=self.logger.level,
# source="http://api.atoka.it"
)()
if 'ownerships' in contexts:
self.logger.info("ownerships")
ETL(
......
# -*- coding: utf-8 -*-
import logging
from django.core.management.base import BaseCommand
from opdmetl import ETL
from project.api_v1.etl.extractors import JsonArrayExtractor
from project.atoka.etl.loaders import OrganizationEconomicsLoader
class Command(BaseCommand):
help = "Import Organizations economics details from a remote or local json source"
logger = logging.getLogger(__name__)
def add_arguments(self, parser):
parser.add_argument(
dest="source_url", help="Source of the JSON file (http[s]:// or file:///)"
)
parser.add_argument(
"--update-strategy",
dest="update_strategy",
default="overwrite",
help="Whether to keep old values or to overwrite them (keep_old | overwrite), defaults to keep_old",
)
parser.add_argument(
"--identifier-scheme",
dest="identifier_scheme",
default='ATOKA_ID',
help="Which scheme to use with identifier/mixed lookup strategy",
)
parser.add_argument(
"--log-step",
dest="log_step",
type=int,
default=500,
help="Number of steps to log process completion to stdout. Defaults to 500.",
)
def handle(self, *args, **options):
verbosity = options["verbosity"]
if verbosity == 0:
self.logger.setLevel(logging.ERROR)
elif verbosity == 1:
self.logger.setLevel(logging.WARNING)
elif verbosity == 2:
self.logger.setLevel(logging.INFO)
elif verbosity == 3:
self.logger.setLevel(logging.DEBUG)
update_strategy = options["update_strategy"]
identifier_scheme = options["identifier_scheme"]
source_url = options["source_url"]
log_step = options["log_step"]
self.logger.info("Start records import")
self.logger.info("Reading CSV from url: {0}".format(source_url))
# define the instance and invoke the etl() method through __call__()
self.logger.info("Starting ETL process")
ETL(
extractor=JsonArrayExtractor(source_url),
loader=OrganizationEconomicsLoader(
update_strategy=update_strategy,
identifier_scheme=identifier_scheme,
log_step=log_step
),
log_level=self.logger.level,
)()
self.logger.info("End")
# -*- coding: utf-8 -*-
# Generated by Django 1.11.15 on 2019-02-04 15:22
# Generated by Django 1.11.15 on 2019-02-06 16:26
from __future__ import unicode_literals
import django.core.validators
......@@ -18,13 +18,47 @@ class Migration(migrations.Migration):
operations = [
migrations.CreateModel(
name='OrganizationEconomics',
fields=[
('organization', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='economics', serialize=False, to='popolo.Organization')),
('employees', models.PositiveIntegerField(blank=True, help_text='Latest number of employees', null=True)),
('revenue', models.BigIntegerField(blank=True, help_text='Latest yearly revenue', null=True)),
('revenue_trend', models.FloatField(blank=True, help_text='Latest trend in revenues', null=True, validators=[django.core.validators.MinValueValidator(0.0), django.core.validators.MaxValueValidator(2019)])),
('capital_stock', models.BigIntegerField(blank=True, help_text='Latest capital stock', null=True)),
('assets', models.BigIntegerField(blank=True, help_text='Latest yearly assets', null=True)),
('costs', models.BigIntegerField(blank=True, help_text='Latest yearly costs', null=True)),
('ebitda', models.BigIntegerField(blank=True, help_text='Latest yearly ebitda', null=True)),
('mol', models.BigIntegerField(blank=True, help_text='Latest yearly mol', null=True)),
('net_financial_position', models.BigIntegerField(blank=True, help_text='Latest yearly net financial position', null=True)),
('production', models.BigIntegerField(blank=True, help_text='Latest yearly production', null=True)),
('profit', models.BigIntegerField(blank=True, help_text='Latest yearly profit', null=True)),
('purchases', models.BigIntegerField(blank=True, help_text='Latest yearly purchases', null=True)),
('raw_materials_variation', models.BigIntegerField(blank=True, help_text='Latest yearly raw materials variation', null=True)),
('services_and_tp_goods_charges', models.BigIntegerField(blank=True, help_text='Latest yearly service and tp goods charges', null=True)),
('staff_costs', models.BigIntegerField(blank=True, help_text='Latest yearly staff costs', null=True)),
('is_public', models.BooleanField(default=False, help_text='If the organization is listed in the publick stock exchange')),
],
),
migrations.CreateModel(
name='OrganizationEconomicsHistorical',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('year', models.PositiveIntegerField(help_text='Year of validity of economics indicator', validators=[django.core.validators.MinValueValidator(2000), django.core.validators.MaxValueValidator(2019)])),
('n_employees', models.PositiveIntegerField(help_text='Nymber of employees for current year')),
('revenue', models.PositiveIntegerField(help_text='Revenue for current year')),
('revenue_trend', models.FloatField(help_text='Revenue trend with respect to previous year', validators=[django.core.validators.MinValueValidator(0.0), django.core.validators.MaxValueValidator(2019)])),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='popolo.Organization')),
('employees', models.PositiveIntegerField(blank=True, help_text="Current year's number of employees", null=True)),
('revenue', models.BigIntegerField(blank=True, help_text='Revenue for current year', null=True)),
('revenue_trend', models.FloatField(blank=True, help_text="Current year's revenue trend with respect to previous year", null=True, validators=[django.core.validators.MinValueValidator(0.0), django.core.validators.MaxValueValidator(2019)])),
('capital_stock', models.BigIntegerField(blank=True, help_text="Current year's capital stock", null=True)),
('assets', models.BigIntegerField(blank=True, help_text="Current year's assets", null=True)),
('costs', models.BigIntegerField(blank=True, help_text="Current year's costs", null=True)),
('ebitda', models.BigIntegerField(blank=True, help_text="Current year's ebitda", null=True)),
('mol', models.BigIntegerField(blank=True, help_text="Current year's mol", null=True)),
('net_financial_position', models.BigIntegerField(blank=True, help_text="Current year's net financial position", null=True)),
('production', models.BigIntegerField(blank=True, help_text="Current year's production", null=True)),
('profit', models.BigIntegerField(blank=True, help_text="Current year's profit", null=True)),
('purchases', models.BigIntegerField(blank=True, help_text="Current year's purchases", null=True)),
('raw_materials_variation', models.BigIntegerField(blank=True, help_text="Current year's raw materials variation", null=True)),
('services_and_tp_goods_charges', models.BigIntegerField(blank=True, help_text="Current year's service and tp goods charges", null=True)),
('staff_costs', models.BigIntegerField(blank=True, help_text="Current year's staff costs", null=True)),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='historical_values', to='atoka.OrganizationEconomics')),
],
),
]
......@@ -11,21 +11,155 @@ current_year = datetime.now().year
class OrganizationEconomics(models.Model):
organization = models.ForeignKey(
"""Keeps economics latest indicator for an Organization
It is related 1-1 with a popolo Organization instance"""
organization = models.OneToOneField(
Organization,
on_delete=models.CASCADE,
primary_key=True,
related_name='economics'
)
employees = models.PositiveIntegerField(
blank=True, null=True,
help_text=_("Latest number of employees")
)
revenue = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly revenue")
)
revenue_trend = models.FloatField(
blank=True, null=True,
validators=[MinValueValidator(0.), MaxValueValidator(current_year)],
help_text=_("Latest trend in revenues")
)
capital_stock = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest capital stock")
)
assets = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly assets")
)
costs = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly costs")
)
ebitda = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly ebitda")
)
mol = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly mol")
)
net_financial_position = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly net financial position")
)
production = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly production")
)
profit = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly profit")
)
purchases = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly purchases")
)
raw_materials_variation = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly raw materials variation")
)
services_and_tp_goods_charges = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly service and tp goods charges")
)
staff_costs = models.BigIntegerField(
blank=True, null=True,
help_text=_("Latest yearly staff costs")
)
is_public = models.BooleanField(
default=False,
help_text=_("If the organization is listed in the publick stock exchange")
)
class OrganizationEconomicsHistorical(models.Model):
"""Keeps economics historical indicators for an Organization
It also contains data for the latest year.
"""
organization = models.ForeignKey(
OrganizationEconomics,
on_delete=models.CASCADE,
related_name='historical_values'
)
year = models.PositiveIntegerField(
validators=[MinValueValidator(2000), MaxValueValidator(current_year)],
help_text=_("Year of validity of economics indicator")
)
n_employees = models.PositiveIntegerField(
help_text=_("Nymber of employees for current year")
employees = models.PositiveIntegerField(
blank=True, null=True,
help_text=_("Current year's number of employees")
)
revenue = models.PositiveIntegerField(
revenue = models.BigIntegerField(
blank=True, null=True,
help_text=_("Revenue for current year")
)
revenue_trend = models.FloatField(
blank=True, null=True,
validators=[MinValueValidator(0.), MaxValueValidator(current_year)],
help_text=_("Revenue trend with respect to previous year")
help_text=_("Current year's revenue trend with respect to previous year")
)
capital_stock = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's capital stock")
)
assets = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's assets")
)
costs = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's costs")
)
ebitda = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's ebitda")
)
mol = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's mol")
)
net_financial_position = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's net financial position")
)
production = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's production")
)
profit = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's profit")
)
purchases = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's purchases")
)
raw_materials_variation = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's raw materials variation")
)
services_and_tp_goods_charges = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's service and tp goods charges")
)
staff_costs = models.BigIntegerField(
blank=True, null=True,
help_text=_("Current year's staff costs")
)
......@@ -470,6 +470,242 @@ def get_companies(tax_ids):
}[tax_ids]
def get_companies_economics():
"""Return values as if required through a requests to an atoka endpoint, with economics package specified
:return:
"""
return {
"meta": {"count": 2, "limit": 10, "offset": 0, "ordering": "atoka"},
"items": [
{
"active": True,
"base": {
"active": True,
"ateco": [
{
"code": "62.01.00",
"description": "Produzione di software non connesso all'edizione",
"rootCode": "J"
}
],
"cciaa": "TN", "founded": "2012-02-13", "inGroup": True,
"legalClass": "Societ\u00e0 Di Capitale",
"legalForms": [
{"level": 1, "name": "Societ\u00e0 Di Capitale"},
{"level": 2, "name": "Societ\u00e0 A Responsabilit\u00e0 Limitata"}
],
"legalName": "SPAZIODATI S.R.L.",
"nace": [
{"code": "62.01", "description": "Computer programming activities", "rootCode": "J"}
],
"rea": "210089",
"registeredAddress": {
"fullAddress": "Via Adriano Olivetti, 13, 38122, Trento (TN)",
"lat": 46.06248902, "latlonPrecision": 60, "lon": 11.10780205,
"macroregion": "Nord-est", "municipality": "Trento", "postcode": "38122",
"province": "Trento", "provinceCode": "TN",
"region": "Trentino-Alto Adige/S\u00fcdtirol", "state": "Italia",
"streetName": "Adriano Olivetti", "streetNumber": "13", "toponym": "Via"
},
"startup": False,
"taxId": "02241890223",
"vat": "02241890223"
},
"country": "it",
"economics": {
"balanceSheets": [
{"capitalStock": 22000, "currency": "EUR",
"date": "2017-12-31",
"latest": True,
"revenue": 2778000, "revenueTrend": 0.8120999999999999, "year": 2017},
{"capitalStock": 22000, "currency": "EUR",
"date": "2016-12-31",
"revenue": 1533000, "revenueTrend": 2.3254, "year": 2016},
{"capitalStock": 18000, "currency": "EUR",
"date": "2015-12-31",
"revenue": 461000, "revenueTrend": 0.8970999999999999, "year": 2015},
{"capitalStock": 15000, "currency": "EUR",
"date": "2014-12-31",
"revenue": 243000,
"revenueTrend": 0.7868, "year": 2014},
{"capitalStock": 12000, "currency": "EUR",
"date": "2013-12-31",
"revenue": 136000,
"revenueTrend": 0.7436, "year": 2013},
{"capitalStock": 11000, "currency": "EUR",
"date": "2012-12-31",
"revenue": 65000, "year": 2012}
],
"capitalStock": {"value": 21638},
"employees": [
{"date": "2018-09-01", "latest": True, "value": 27, "year": 2018},
{"date": "2018-06-01", "latest": False, "value": 27, "year": 2018},
{"date": "2018-03-01", "latest": False, "value": 27, "year": 2018},
{"date": "2017-12-01", "latest": False, "value": 26, "year": 2017},
{"date": "2017-09-01", "latest": False, "value": 25, "year": 2017},
{"date": "2017-06-01", "latest": False, "value": 23, "year": 2017},
{"date": "2017-03-01", "latest": False, "value": 22, "year": 2017},
{"date": "2016-12-01", "latest": False, "value": 18, "year": 2016},
{"date": "2016-09-01", "latest": False, "value": 17, "year": 2016},
{"date": "2016-06-01", "latest": False, "value": 17, "year": 2016},
{"date": "2016-03-01", "latest": False, "value": 17, "year": 2016},
{"date": "2015-12-01", "latest": False, "value": 13, "year": 2015},
{"date": "2015-09-01", "latest": False, "value": 10, "year": 2015},
{"date": "2015-06-01", "latest": False, "value": 7, "year": 2015},
{"date": "2015-03-01", "latest": False, "value": 6, "year": 2015},
{"date": "2014-12-01", "latest": False, "value": 5, "year": 2014},
{"date": "2014-09-01", "latest": False, "value": 4, "year": 2014},
{"date": "2014-06-01", "latest": False, "value": 4, "year": 2014},
{"date": "2014-03-01", "latest": False, "value": 3, "year": 2014}
],
"public": False
},
"fullAddress": "Via Adriano Olivetti, 13, 38122, Trento (TN)",
"id": "6da785b3adf2",
"name": "SPAZIODATI S.R.L."
},
{
"active": True,
"base": {
"active": True,
"ateco": [
{"code": "63.12.00", "description": "Portali web", "rootCode": "J"}
],
"cciaa": "RM",
"founded": "2008-04-24",
"inGroup": False,
"legalClass": "Societ\u00e0 Di Capitale",
"legalForms": [
{"level": 1, "name": "Societ\u00e0 Di Capitale"},
{"level": 2, "name": "Societ\u00e0 A Responsabilit\u00e0 Limitata"}
],
"legalName": "DEPP SRL",
"nace": [
{"code": "63.12", "description": "Web portals", "rootCode": "J"}
],
"rea": "1201904",
"registeredAddress": {
"fullAddress": "Via "
"Merulana, 19, 00185, "
"Roma (RM)",
"lat": 41.89625,
"latlonPrecision": 90,
"lon": 12.49967,
"macroregion": "Centro",
"municipality": "Roma",
"postcode": "00185",
"province": "Roma",
"provinceCode": "RM",
"region": "Lazio",
"state": "Italia",
"streetName": "Merulana",
"streetNumber": "19",
"toponym": "Via"
},
"startup": False,
"taxId": "09988761004",
"vat": "09988761004"
},
"country": "it",
"economics": {
"balanceSheets": [
{"assets": 172000, "capitalStock": 10000, "costs": 442000, "currency": "EUR",
"date": "2017-12-31",
"ebitda": 25000, "latest": True, "mol": 29000, "netFinancialPosition": 7000,
"production": 471000,
"profit": 12000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 471000,
"revenueTrend": 0.09789999999999999, "servicesAndTPGoodsCharges": 285000,
"staffCosts": 157000, "year": 2017},
{"assets": 158000, "capitalStock": 10000, "costs": 389000, "currency": "EUR",
"date": "2016-12-31",
"ebitda": 31000, "latest": False, "mol": 40000, "netFinancialPosition": -28000,
"production": 429000,
"profit": 12000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 429000,
"revenueTrend": 0.0239,
"servicesAndTPGoodsCharges": 253000, "staffCosts": 136000, "year": 2016},
{"assets": 114000, "capitalStock": 10000, "costs": 422000, "currency": "EUR",
"date": "2015-12-31",
"ebitda": -10000, "latest": False, "mol": -3000, "netFinancialPosition": -33000,
"production": 419000,
"profit": 18000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 419000,
"revenueTrend": 0.3176,
"servicesAndTPGoodsCharges": 311000, "staffCosts": 111000, "year": 2015},
{"assets": 101000, "capitalStock": 10000, "costs": 289000, "currency": "EUR",
"date": "2014-12-31",
"ebitda": 23000, "latest": False, "mol": 29000, "netFinancialPosition": -13000,
"production": 318000,
"profit": 19000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 318000,
"revenueTrend": 0.2927,
"servicesAndTPGoodsCharges": 234000, "staffCosts": 55000, "year": 2014},
{"assets": 90000, "capitalStock": 10000, "costs": 295000, "currency": "EUR",
"date": "2013-12-31",
"ebitda": -55000, "latest": False, "mol": -49000, "netFinancialPosition": -38000,
"production": 246000,
"profit": 6000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 246000,
"revenueTrend": -0.1119,
"servicesAndTPGoodsCharges": 282000, "staffCosts": 13000, "year": 2013},
{"assets": 118000, "capitalStock": 10000, "costs": 253000, "currency": "EUR",
"date": "2012-12-31",
"ebitda": 20000, "latest": False, "mol": 24000, "netFinancialPosition": -12000,
"production": 277000,
"profit": 11000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 277000,
"revenueTrend": 1.0368000000000002, "servicesAndTPGoodsCharges": 253000, "staffCosts": 0,
"year": 2012},
{"assets": 53000, "capitalStock": 10000, "costs": 132000, "currency": "EUR",
"date": "2011-12-31",
"ebitda": 2000, "latest": False, "mol": 4000, "netFinancialPosition": -24000,
"production": 136000,
"profit": -3000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 136000,
"revenueTrend": -0.049,
"servicesAndTPGoodsCharges": 132000, "staffCosts": 0, "year": 2011},
{"assets": 72000, "capitalStock": 10000, "costs": 139000, "currency": "EUR",
"date": "2010-12-31",
"ebitda": 2000, "latest": False, "mol": 4000, "netFinancialPosition": -23000,
"production": 143000,
"profit": -1000, "purchases": 1000, "rawMaterialsVariation": 0, "revenue": 143000,
"revenueTrend": 0.1,
"servicesAndTPGoodsCharges": 138000, "staffCosts": 0, "year": 2010},
{"assets": 105000, "capitalStock": 10000, "costs": 129000, "currency": "EUR",
"date": "2009-12-31",
"ebitda": 3000, "latest": False, "mol": 5000, "netFinancialPosition": -6000,
"production": 134000,
"profit": 1000, "purchases": 0, "rawMaterialsVariation": 0, "revenue": 130000,
"revenueTrend": 1.2807,
"servicesAndTPGoodsCharges": 129000, "staffCosts": 0, "year": 2009},
{"assets": 41000, "capitalStock": 10000, "costs": 36000, "currency": "EUR",
"date": "2008-12-31",
"ebitda": 1000, "latest": False, "mol": 2000, "netFinancialPosition": -17000,
"production": 38000, "profit": 0,
"purchases": 0, "rawMaterialsVariation": 0, "revenue": 38000,
"servicesAndTPGoodsCharges": 36000,
"staffCosts": 0, "year": 2008}
],
"capitalStock": {"value": 10000},
"employees": [
{"date": "2018-03-01", "latest": True, "value": 9, "year": 2018},
{"date": "2017-12-01", "latest": False, "value": 7, "year": 2017},
{"date": "2017-09-01", "latest": False, "value": 7, "year": 2017},
{"date": "2017-06-01", "latest": False, "value": 7, "year": 2017},
{"date": "2017-03-01", "latest": False, "value": 7, "year": 2017},
{"date": "2016-12-01", "latest": False, "value": 7, "year": 2016},
{"date": "2016-09-01", "latest": False, "value": 6, "year": 2016},
{"date": "2016-06-01", "latest": False, "value": 6, "year": 2016},
{"date": "2016-03-01", "latest": False, "value": 7, "year": 2016},
{"date": "2015-12-01", "latest": False, "value": 5, "year": 2015},
{"date": "2015-09-01", "latest": False, "value": 4, "year": 2015},
{"date": "2015-06-01", "latest": False, "value": 4, "year": 2015}
],
"public": False
},
"fullAddress": "Via Merulana, 19, 00185, Roma (RM)",
"id": "38e098baa0f9",
"name": "DEPP SRL"
}
]
}
class ATOKAConnTest(ConnectionsTestCase):
def test_get_person_from_tax_id_ok(self):
......@@ -727,3 +963,29 @@ class ATOKAConnTest(ConnectionsTestCase):
atoka_conn = AtokaConn()
with self.assertRaises(AtokaResponseError):
atoka_conn.get_companies_from_tax_ids(tax_id, packages='base,shares', active="true")
def test_get_companies_economics_ok(self):
"""Test get_companies_from_tax_ids with economics details has the correct information
"""
tax_ids = ['02241890223', '09988761004']
# mock atoka request using tax_id
# mock atoka request using tax_id
self.mock_post.return_value = MockResponse(
get_companies_economics(),
status_code=200,
ok=True
)
# do the test
atoka_conn = AtokaConn()
atoka_resp = atoka_conn.get_companies_from_tax_ids(tax_ids, packages='base,economics', active="true")
self.assertEqual(len(atoka_resp), 2)
c = atoka_resp[0]
self.assertEqual(c['base']['taxId'], tax_ids[0])
self.assertEqual('economics' in c, True)
ce = c['economics']
self.assertEqual('balanceSheets' in ce, True)
self.assertEqual(len(ce['balanceSheets']) > 1, True)
self.assertEqual('employees' in ce, True)
self.assertEqual(len(ce['employees']) > 1, True)
from unittest.mock import MagicMock
from popolo.models import Organization, Ownership, Membership
from opdmetl import ETL
from tests.factories import OrganizationFactory, ClassificationFactory
from project.api_v1.etl.extractors import AtokaOwnershipsExtractor, ListExtractor
from project.api_v1.etl.loaders.organizations import PopoloOrgLoader, PopoloOrgOwnershipLoader
from project.api_v1.etl.transformations.atoka import AtokaOwnershipOrgTransformation, AtokaOwnershipTransformation, \
AtokaMembershipTransformation
from project.api_v1.etl.extractors import ListExtractor
from project.api_v1.etl.loaders import DummyLoader
from project.atoka.etl.extractors import AtokaOwnershipsExtractor, AtokaEconomicsExtractor
from project.api_v1.tests.etl import SolrETLTest
from project.api_v1.tests.etl.atoka_mocks import get_companies_tax_ids_batch, get_companies_atoka_ids_batch, \
get_roles_atoka_ids_batch
from project.atoka.etl.transformations import AtokaOrganizationEconomicsTransformation
from project.atoka.tests.etl.atoka_mocks import get_companies_tax_ids_batch, get_companies_atoka_ids_batch, \
get_roles_atoka_ids_batch, get_companies_economics_extractor, get_companies_economics_transformation
from project.api_v1.tests import faker
from project.connections.atoka import AtokaConn
from project.atoka.connections import AtokaConn
class AtokaETLTest(SolrETLTest):
"""Tests for the ETL use cases
"""
"""
@classmethod
def setUpClass(cls):
super(AtokaETLTest, cls).setUpClass()
# # need to st
# getattr(self, 'mock_get_patcher').stop()
# getattr(self, 'mock_post_patcher').stop()
def test_ownerships_extractor(self):
"""Test extraction of ownerships works correctly
"""
# mock atoka_conn method for cciaa or govTypes results
AtokaConn.get_companies_from_tax_ids = MagicMock(
......@@ -48,19 +40,12 @@ class AtokaETLTest(SolrETLTest):
ok=True
)
@classmethod
def tearDownClass(cls):
super(AtokaETLTest, cls).tearDownClass()
def test_extractor(self):
"""Test exctraction of ownerships works correctly
"""
res = AtokaOwnershipsExtractor(
['02438750586', '00008010803', '00031500945', '00031730948', '00033120437', '00034670943']
).extract()
).extract()['results']
self.assertEqual(type(res), list)
self.assertEqual(len(res), 6)
self.assertEqual(len(res), 5)
c = res[next(i for i, v in enumerate(res) if v['tax_id'] == '00008010803')]
self.assertEqual(c['atoka_id'], 'b248111d6667')
......@@ -85,83 +70,82 @@ class AtokaETLTest(SolrETLTest):
self.assertEqual('roles' in sho, True)
# test with Rome
c = res[next(i for i, v in enumerate(res) if v['tax_id'] == '02438750586')]
self.assertEqual(len(c['other_atoka_ids']), 1)
self.assertEqual(len(c['shares_owned']), 6)
def test_create_or_update_organizations_and_ownerships(self):
"""Tests that ownerships among orgnizations extracted from ATOKA (owning and owned)
are created anew or updated when existing.
Organizations are also created or updated.
:return:
def test_economics_extractor(self):
"""Test that the structure of the extracted information matches requirements
"""
owning = OrganizationFactory.create(identifier="02438750586", name="Comune di Roma")
for cl_id in [321, 11, 24, 295]:
ClassificationFactory.create(id=cl_id, descr=faker.word())
atoka_ownerships = AtokaOwnershipsExtractor(
['02438750586', '00008010803', '00031500945',
'00031730948', '00033120437', '00034670943']
).extract()
ETL(
extractor=ListExtractor(atoka_ownerships),
transformation=AtokaOwnershipOrgTransformation(),
loader=PopoloOrgLoader(lookup_strategy='identifier', identifier_scheme=None),
log_level=0,
)()
self.assertEqual(Organization.objects.count() > 1, True)
self.assertEqual(owning.name, "Comune di Roma")
self.assertNotEqual(
owning.classifications.filter(classification__scheme='CCIAA').count(), 0
# mock atoka_conn method for cciaa or govTypes results
AtokaConn.get_companies_from_tax_ids = MagicMock(
side_effect=get_companies_economics_extractor,
status_code=200,
ok=True
)
ETL(
extractor=ListExtractor(atoka_ownerships),
transformation=AtokaOwnershipTransformation(),
loader=PopoloOrgOwnershipLoader(lookup_strategy='identifier', identifier_scheme=None),
log_level=0,
)()
self.assertEqual(Ownership.objects.count() > 1, True)
def test_create_or_update_organizations_and_memberships(self):
"""Tests that memberships in orgnizations extracted from ATOKA (owning and owned)
are created anew or updated when existing.
Organizations are also created or updated.
:return:
# extract info
res = AtokaEconomicsExtractor(
['02241890223', '09988761004']
).extract()['results']
self.assertEqual(len(res), 2)
# test complete economics info
c = res[next(i for i, v in enumerate(res) if v['tax_id'] == '09988761004')]
self.assertEqual(c['id'], '38e098baa0f9')
self.assertEqual(c['name'], 'DEPP SRL')
ce = c['economics']
self.assertEqual('balanceSheets' in ce, True)
self.assertEqual(len(ce['balanceSheets']) > 1, True)
self.assertEqual('employees' in ce, True)
self.assertEqual(len(ce['employees']) > 1, True)
def test_economics_transformation(self):
"""Test that the structure of the extracted information matches requirements
"""
owning = OrganizationFactory.create(identifier="02438750586", name="Comune di Roma")
for cl_id in [321, 11, 24, 295]:
ClassificationFactory.create(id=cl_id, descr=faker.word())
atoka_ownerships = AtokaOwnershipsExtractor(
['02438750586', '00008010803', '00031500945',
'00031730948', '00033120437', '00034670943']
).extract()
ETL(
extractor=ListExtractor(atoka_ownerships),
transformation=AtokaOwnershipOrgTransformation(),
loader=PopoloOrgLoader(lookup_strategy='identifier', identifier_scheme=None),
atoka_records = get_companies_economics_transformation()
etl = ETL(
extractor=ListExtractor(atoka_records),
transformation=AtokaOrganizationEconomicsTransformation(),
loader=DummyLoader(),
log_level=0,
)()
self.assertEqual(Organization.objects.count() > 1, True)
self.assertEqual(owning.name, "Comune di Roma")
self.assertNotEqual(
owning.classifications.filter(classification__scheme='CCIAA').count(), 0
)
ETL(
extractor=ListExtractor(atoka_ownerships),
transformation=AtokaMembershipTransformation(),
loader=PopoloOrgOwnershipLoader(lookup_strategy='identifier', identifier_scheme=None),
log_level=0,
)()
self.assertEqual(Membership.objects.count() > 1, True)
# source="http://api.atoka.it"
)
etl.extract().transform()
# test complete economics info
res = etl.processed_data
c = res[next(i for i, v in enumerate(res) if v['tax_id'] == '09988761004')]
self.assertEqual(c['atoka_id'], '38e098baa0f9')
self.assertEqual(c['name'], 'DEPP SRL')
self.assertEqual(c['is_public'], False)
self.assertEqual(c['capital_stock'], 10000)
self.assertEqual(c['revenue'], 471000)
self.assertEqual(c['mol'], 29000)
self.assertEqual(c['employees'], 9)
self.assertGreaterEqual(len(c['historical_values']), 4)
self.assertEqual(c['historical_values'][0]['year'], 2017)
sh = c['historical_values'][0]
self.assertEqual(sh['assets'], 172000)
self.assertEqual(sh['costs'], 442000)
self.assertEqual(sh['ebitda'], 25000)
self.assertEqual(sh['mol'], 29000)
self.assertEqual(sh['production'], 471000)
self.assertEqual(sh['profit'], 12000)
self.assertEqual(sh['staff_costs'], 157000)
self.assertEqual(sh['purchases'], 0)
self.assertEqual(sh['employees'], 9)
self.assertEqual(sh['services_and_tp_goods_charges'], 285000)
self.assertEqual(sh['net_financial_position'], 7000)
# test restricted economics info
c = res[next(i for i, v in enumerate(res) if v['tax_id'] == '02241890223')]
self.assertEqual(c['atoka_id'], '6da785b3adf2')
self.assertEqual(c['name'], 'SPAZIODATI S.R.L.')
self.assertEqual(c['is_public'], False)
self.assertEqual(c['capital_stock'], 21638)
self.assertGreaterEqual(len(c['historical_values']), 4)
self.assertEqual(c['historical_values'][0]['year'], 2017)
sh = c['historical_values'][0]
self.assertEqual(sh.get('assets', None), None)
self.assertEqual(sh.get('costs', None), None)
self.assertEqual(sh['employees'], 9)
import json
from io import StringIO
import requests
from django.conf import settings
from popolo.models import Person
from requests_toolbelt import MultipartEncoder
class AtokaConn(object):
"""Helper class to perform queries on ATOKA api service.
Configuration values are secret and must be kept safe in environment varables.
"""
service_url = settings.ATOKA_API_ENDPOINT
version = settings.ATOKA_API_VERSION
key = settings.ATOKA_API_KEY
allowed_roles = \
"titolare firmatario,amministratore unico,consigliere,socio amministratore,socio accomandante," \
"socio,socio accomandatario,presidente consiglio amministrazione,socio unico,amministratore,titolare," \
"sindaco effettivo,vice presidente consiglio amministrazione,amministratore delegato,liquidatore," \
"sindaco supplente,socio di societa' in nome collettivo,consigliere delegato,presidente," \
"curatore fallimentare,presidente del collegio sindacale,vice presidente,legale rappresentante," \
"revisore dei conti,legale rappresentante di societa',institore,direttore generale"
def get_person_from_tax_id(self, tax_id: str) -> dict:
"""get a single person from ATOKA API, from its tax_id
raise Atoka exceptions if errors or no objects found
:param tax_id: string - the tax_id as a string
:return: dict - ATOKA result
"""
response = requests.get(
'{0}/{1}/people'.format(
self.service_url, self.version
),
params={
'token': self.key,
'taxIds': tax_id,
'packages': 'base,companies,shares'
}
)
if not response.ok:
raise AtokaResponseError(response.reason)
result = response.json()
if result['meta']['count'] == 0:
raise AtokaObjectDoesNotExist(
"Could not find person with tax_id {0} in Atoka.".format(tax_id)
)
if result['meta']['count'] > 1:
raise AtokaMultipleObjectsReturned(
"Found more than one person with tax_id {0} in Atoka.".format(tax_id)
)
return result['items'][0]
def search_person(self, person: Person) -> dict:
"""get a single person from ATOKA API, from its tax_id
raise Atoka exceptions if errors or no objects found
:param person: Person - instance of OPDM person to look for into ATOKA
:return: dict - atoka result
"""
params = {
'token': self.key,
'givenName': person.given_name,
'familyName': person.family_name,
'birthDateFrom': person.birth_date,
'birthDateTo': person.birth_date,
'birtPlaceMunicipalities': person.birth_location_area.name,
'packages': 'base,companies,shares'
}
response = requests.get(
'{0}/{1}/people'.format(
self.service_url, self.version
),
params=params
)
if not response.ok:
raise AtokaResponseError(response.reason)
params.pop('token')
params.pop('packages')
result = response.json()
if result['meta']['count'] == 0:
raise AtokaObjectDoesNotExist(
"Could not find person with parameters {0} in Atoka.".format(params)
)
if result['meta']['count'] > 1:
raise AtokaMultipleObjectsReturned(
"Found more than one person with parameters {0} in Atoka.".format(params)
)
return result['items'][0]
def get_items_from_ids(
self, ids: list, item_type: str, ids_field_name: str = 'ids', batch_size: int = 50, **kwargs
) -> list:
"""Transform a request for a list of ids larger than batch_size,
to a batch request of enough rows with a limit of batch_size, so that all results
can be returned.
Results are composed and returned as a list of dicts.
:param ids: list
:param item_type: str
:param ids_field_name: ids, tax_ids
:param batch_size: size of the number of ids searched by row of the batch IO
:param kwargs: - more atoka parameters for filtering results
(ex: packages=base,shares, active='true', ccia='*')
:return: results as a list of dicts
"""
if ids_field_name not in ['ids', 'taxIds', 'companies']:
raise AtokaException("ids_field_name parameter must take one of these values: <ids>, <taxIds>, <companies>")
if batch_size < 1 or batch_size > 50:
raise AtokaException("batch_size must be between 1 and 50")
if item_type not in ['companies', 'people']:
raise AtokaException("item_type must take one of these values: <companies>, <people>")
if len(ids) == 0:
return []
api_endpoint = "{0}/{1}/{2}/".format(
self.service_url, self.version, item_type
)
# internal function to split ids list into chunks
def chunks(lst, size):
"""Yield successive size-sized chunks from lst."""
for i in range(0, len(lst), size):
yield lst[i:i + size]
# build fileIO to upload form batch execution
file_io = StringIO()
for n, r in enumerate(chunks(ids, batch_size)):
print(json.dumps({
"reqId": "r{0:05d}".format(n),
ids_field_name: ','.join(r),
}), file=file_io)
# batch API request
fields = {
'batch': ('batch.json', file_io),
'limit': '50'
}
fields.update(kwargs)
m = MultipartEncoder(
fields=fields
)
response = requests.post(
api_endpoint,
params={'token': self.key},
data=m,
headers={'Content-Type': m.content_type}
)
# destroy fileIO
file_io.close()
if response is None:
return []
# return response
json_response = response.json()
if not response.ok:
raise AtokaResponseError(response.reason)
total_response = []
if 'responses' in json_response:
for r in json_response['responses'].values():
total_response.extend(r['items'])
else:
total_response.extend(json_response['items'])
if len(total_response) == 0:
raise AtokaObjectDoesNotExist(getattr(response, "content", None))
return total_response
def get_companies_from_tax_ids(self, tax_ids: list, **kwargs) -> list:
"""get all companies from ATOKA API, from given tax_ids list
raise Atoka exceptions if errors or no objects found
:param tax_ids: - the list of tax_ids to extract info from
:param kwargs: - more atoka parameters for filtering results (ex: active='true', ccia='*')
:return: dict - ATOKA result
"""
return self.get_items_from_ids(tax_ids, 'companies', ids_field_name='taxIds', **kwargs)
def get_companies_from_atoka_ids(self, atoka_ids: list, **kwargs) -> list:
"""get all companies from ATOKA API, from given atoka_ids list
raise Atoka exceptions if errors or no objects found
:param atoka_ids: - the list of ids to extract info from
:param kwargs: - more atoka parameters for filtering results (ex: active='true', ccia='*')
:return: dict - ATOKA result
"""
return self.get_items_from_ids(atoka_ids, 'companies', ids_field_name='ids', **kwargs)
def get_people_from_tax_ids(self, tax_ids: list, **kwargs) -> list:
"""get all people from ATOKA API, from given tax_ids list
raise Atoka exceptions if errors or no objects found
:param tax_ids: - the list of tax_ids to extract info from
:param kwargs: - more atoka parameters for filtering results (ex: active='true', ccia='*')
:return: dict - ATOKA result
"""
return self.get_items_from_ids(tax_ids, 'people', ids_field_name='taxIds', **kwargs)
def get_people_from_atoka_ids(self, atoka_ids: list, **kwargs) -> list:
"""get all people from ATOKA API, from given atoka_ids list
raise Atoka exceptions if errors or no objects found
:param atoka_ids: - the list of ids to extract info from
:param kwargs: - more atoka parameters for filtering results (ex: active='true', ccia='*')
:return: dict - ATOKA result
"""
return self.get_items_from_ids(atoka_ids, 'people', ids_field_name='ids', **kwargs)
def get_roles_from_atoka_ids(self, atoka_ids: list, **kwargs) -> list:
"""get all people in given companies, used to extract roles
:param atoka_ids:
:param kwargs:
:return:
"""
# need a batch_size of 1 because the number of people in a single company can be great,
# and 50 is the maximum limit for a single batch row request
return self.get_items_from_ids(atoka_ids, 'people', ids_field_name='companies', batch_size=1, **kwargs)
class AtokaException(Exception):
pass
class AtokaObjectDoesNotExist(AtokaException):
pass
class AtokaMultipleObjectsReturned(AtokaException):
pass
class AtokaResponseError(AtokaException):
pass
This diff is collapsed.
[bumpversion]
current_version = 1.1.0
current_version = 1.1.1
commit = True
tag = True
tag_name = v{new_version}
......