...
 
Commits (7)
......@@ -290,9 +290,12 @@ CONSTANCE_CONFIG = {
'SMTP_USERNAME': (env("EMAIL_HOST_USER", default=""), "Username for the SMTP host"),
'SMTP_PASSWORD': (env("EMAIL_HOST_PASS", default=""), "Password for the SMTP host"),
'EMAIL_SUBJECT_PREFIX': (env("EMAIL_SUBJECT_PREFIX", default="[servizio verificafonti] "), "Email subject prefix"),
'EMAIL_FROM': (env("DEFAULT_FROM_EMAIL", default="noreply@openpolis.it"), "From email address for notifications")
'EMAIL_FROM': (env("DEFAULT_FROM_EMAIL", default="noreply@openpolis.it"), "From email address for notifications"),
'REQUESTS_MAX_TIMEOUT': (env("RQEUSTS_MAX_TIMEOUT", default=10), "Max timeout for requests in seconds", int),
'REQUESTS_UA': (env("REQUESTS_UA", default=""), "User agent in requests"),
}
CONSTANCE_IGNORE_ADMIN_VERSION_CHECK = True
CONSTANCE_BACKEND = 'constance.backends.database.DatabaseBackend'
# END CONSTANCE (LIVE SETTINGS) CONFIGURATION
......
......@@ -19,11 +19,11 @@ class ContentAdmin(DjangoObjectActions, AdminRowActionsMixin, admin.ModelAdmin):
list_display = ('_linked_title',
'verified_at',
'_status_and_message')
search_fields = ('title', 'notes')
search_fields = ('title', 'notes', 'verification_error', 'verification_status', 'url')
list_filter = ('verification_status', 'organisation_type')
fieldsets = (
(None, {
'fields': ('title', 'organisation_type', 'notes', 'op_url', 'url', 'xpath', 'use_cleaner',
'fields': ('title', 'organisation_type', 'timeout', 'notes', 'op_url', 'url', 'xpath', 'use_cleaner',
'content'),
}),
('Verification', {
......
import difflib
import datetime
from django.core import management
from django.utils.timezone import now
from taskmanager.logging_utils import LoggingBaseCommand
from project.webapp.models import Content
......@@ -9,14 +10,14 @@ class Command(LoggingBaseCommand):
help = "Verify content of specified URI's ids or all"
def add_arguments(self, parser):
parser.add_argument('id', nargs='*', type=int)
parser.add_argument('ids', nargs='*', type=int)
parser.add_argument(
'--dryrun',
'--dry-run',
action='store_true',
dest='dryrun',
default=False,
help='Execute a dry run: no db is written.',
help='Execute a dry run: no db is written, no notification sent.',
)
parser.add_argument(
'--content',
......@@ -34,7 +35,6 @@ class Command(LoggingBaseCommand):
)
parser.add_argument(
'--offset',
action='store',
type=int,
dest='offset',
default=0,
......@@ -42,27 +42,40 @@ class Command(LoggingBaseCommand):
)
parser.add_argument(
'--limit',
action='store',
type=int,
dest='limit',
default=0,
help='Force offset <> 0',
)
parser.add_argument(
'--notify',
action='store_true',
dest='notify',
default=False,
help='Notify changes to registered recipients or channels',
)
parser.add_argument(
'--notification-method',
dest='notification_method',
default='slack',
help='What method to use for notification: slack|email|both',
)
def handle(self, *args, **options):
self.setup_logger(__name__, formatter_key="simple", **options)
offset = options['offset']
limit = options['limit']
ids = options.get('ids', [])
if len(args) == 0:
if len(ids) == 0:
if limit > 0:
contents = Content.objects.all()[
offset:(offset + limit)]
else:
contents = Content.objects.all()[offset:]
else:
contents = Content.objects.filter(id__in=args)
contents = Content.objects.filter(id__in=ids)
if len(contents) == 0:
self.logger.info("no content to check this time")
......@@ -80,17 +93,21 @@ class Command(LoggingBaseCommand):
if options['dryrun'] is False:
content.verification_status = Content.STATUS_ERROR
content.verification_error = err_msg
content.verified_at = datetime.datetime.now()
content.verified_at = now()
content.save()
self.logger.warning("{0}/{1} - {2} while processing {3} (id: {4})".format(
cnt + 1, len(contents), err_msg, content.title, content.id
))
else:
print(
if content.verification_error:
status = content.verification_error
else:
status = content.get_verification_status_display().upper()
self.logger.info(
"{0}/{1} - {2} (id: {4}) - {3}".format(
cnt + 1, len(contents), content.title,
content.get_verification_status_display().upper(),
content.id
status,
content.id,
)
)
if options['showmeat'] is True:
......@@ -100,3 +117,12 @@ class Command(LoggingBaseCommand):
stored = content.meat.splitlines(1)
diff = difflib.ndiff(live, stored)
self.logger.info("".join(diff))
if options['notify'] and not options['dryrun']:
verbosity = int(options.get("verbosity", 1))
management.call_command(
'notify',
verbosity=verbosity,
notification_method=options['notification_method'],
stdout=self.stdout,
)
# Generated by Django 2.2.1 on 2019-06-11 15:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('webapp', '0014_auto_20190507_1349'),
]
operations = [
migrations.AlterField(
model_name='content',
name='title',
field=models.CharField(help_text="Indicare l'istituzione (es. Cons. Reg. Lazio)", max_length=512, verbose_name='Denominazione della fonte'),
),
migrations.AlterField(
model_name='content',
name='xpath',
field=models.CharField(blank=True, max_length=512),
),
]
# Generated by Django 2.2.1 on 2019-06-11 15:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('webapp', '0015_auto_20190611_1710'),
]
operations = [
migrations.AlterField(
model_name='content',
name='verification_error',
field=models.CharField(blank=True, max_length=1024, null=True, verbose_name='Errore'),
),
]
# Generated by Django 2.2.1 on 2019-06-17 09:50
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('webapp', '0016_auto_20190611_1716'),
]
operations = [
migrations.AddField(
model_name='content',
name='timeout',
field=models.PositiveSmallIntegerField(default=10),
),
]
# coding: utf-8
from constance import config
from django.utils.translation import ugettext_lazy as _
from django.db import models
from django.utils import timezone
......@@ -10,7 +11,9 @@ import lxml.html
from lxml.etree import ParserError
from lxml.html.clean import Cleaner
from lxml.etree import XPathEvalError
from requests.exceptions import SSLError
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@python_2_unicode_compatible
class OrganisationType(models.Model):
......@@ -52,7 +55,7 @@ class Content(models.Model):
)
title = models.CharField(
max_length=250,
max_length=512,
verbose_name=_("Denominazione della fonte"),
help_text="""Indicare l'istituzione (es. Cons. Reg. Lazio)"""
)
......@@ -66,11 +69,14 @@ class Content(models.Model):
blank=True, null=True,
help_text="URL della pagina OP contenente le istituzioni"
)
xpath = models.CharField(blank=True, max_length=250)
xpath = models.CharField(blank=True, max_length=512)
content = models.TextField(
blank=True, null=True,
verbose_name=_("Contenuto significativo")
)
timeout = models.PositiveSmallIntegerField(
default=config.REQUESTS_MAX_TIMEOUT
)
notes = models.TextField(
blank=True, null=True,
verbose_name=_("Note")
......@@ -85,7 +91,7 @@ class Content(models.Model):
verbose_name=_("Stato")
)
verification_error = models.CharField(
blank=True, null=True, max_length=250,
blank=True, null=True, max_length=1024,
verbose_name=_("Errore")
)
use_cleaner = models.BooleanField(
......@@ -110,15 +116,26 @@ class Content(models.Model):
when xpath failes status code is 900
"""
headers = {}
if config.REQUESTS_UA:
headers['User-Agent'] = config.REQUESTS_UA
try:
res = requests.get(self.url)
res = requests.get(self.url, timeout=self.timeout, headers=headers)
if not res.ok:
raise Exception("Internal Urllib error")
if res.status_code != 200:
return res.status_code, "URL"
except ConnectionError:
return 910, "URL inesistente"
if res.status_code and res.reason:
return res.status_code, res.reason
else:
return 990, "Generic error"
except SSLError:
try:
res = requests.get(self.url, timeout=self.timeout, headers=headers, verify=False)
except Exception as e:
return 990, e
except ConnectionError as e:
return 910, e
except Exception as e:
return 990, e
# extract textual content from html,
# using cleaner if set in model
......