Skip to content

Commit

Permalink
Fix configuration schema checking
Browse files Browse the repository at this point in the history
Fix some minor IDE warnings
  • Loading branch information
dlazesz committed Aug 3, 2020
1 parent 376b564 commit 492c60e
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 17 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include webarticlecurator/crawl_schema.yaml
include webarticlecurator/site_schema.yaml
5 changes: 3 additions & 2 deletions webarticlecurator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,9 @@ def main():
format(set(commands.keys())))

command = parser.parse_args(sys.argv[1:2]).command # Route ArgumentParser before reparsing the whole CLI
sub_args = commands[command][0](ArgumentParser())
commands[command][1](sub_args)
argparse_fun, main_fun = commands[command]
sub_args = argparse_fun(ArgumentParser())
main_fun(sub_args)


if __name__ == '__main__':
Expand Down
24 changes: 12 additions & 12 deletions webarticlecurator/news_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ def __init__(self, settings, existing_archive_filename, new_archive_filename, ar

# Open files for writing gathered URLs on exit if needed
self.good_urls = set()
self._new_good_archive_urls_fh = settings.get('new_good_archive_urls')
if self._new_good_archive_urls_fh is not None:
self._new_good_archive_urls_fh = open(self._new_good_archive_urls_fh, 'w', encoding='UTF-8')
new_good_archive_urls_fname = settings.get('new_good_archive_urls')
if new_good_archive_urls_fname is not None:
self._new_good_archive_urls_fh = open(new_good_archive_urls_fname, 'w', encoding='UTF-8')

self.problematic_urls = set()
self._new_problematic_archive_urls_fh = settings.get('new_problematic_archive_urls')
if self._new_problematic_archive_urls_fh is not None:
self._new_problematic_archive_urls_fh = open(self._new_problematic_archive_urls_fh, 'w', encoding='UTF-8')
new_problematic_archive_urls_fname = settings.get('new_problematic_archive_urls')
if new_problematic_archive_urls_fname is not None:
self._new_problematic_archive_urls_fh = open(new_problematic_archive_urls_fname, 'w', encoding='UTF-8')

# Setup the list of cached article URLs to stop archive crawling in time
self.known_article_urls = set()
Expand Down Expand Up @@ -241,14 +241,14 @@ def __init__(self, settings, articles_existing_warc_filename, articles_new_warc_

# Open files for writing gathered URLs on exit if needed
self._new_urls = set()
self._new_good_urls_fh = settings.get('new_good_urls')
if self._new_good_urls_fh is not None:
self._new_good_urls_fh = open(self._new_good_urls_fh, 'a+', encoding='UTF-8')
new_good_urls_fname = settings.get('new_good_urls')
if new_good_urls_fname is not None:
self._new_good_urls_fh = open(new_good_urls_fname, 'a+', encoding='UTF-8')

self.problematic_article_urls = set()
self._new_problematic_urls_fh = settings.get('new_problematic_urls')
if self._new_problematic_urls_fh is not None:
self._new_problematic_urls_fh = open(self._new_problematic_urls_fh, 'a+', encoding='UTF-8')
new_problematic_urls_fname = settings.get('new_problematic_urls')
if new_problematic_urls_fname is not None:
self._new_problematic_urls_fh = open(new_problematic_urls_fname, 'a+', encoding='UTF-8')

# Store values at init-time
self._filter_by_date = settings['FILTER_ARTICLES_BY_DATE']
Expand Down
4 changes: 2 additions & 2 deletions webarticlecurator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

import yamale

site_schema = yamale.make_schema(os.path.join(os.path.dirname(os.path.abspath(__file__)), './site_schema.yaml'))
crawl_schema = yamale.make_schema(os.path.join(os.path.dirname(os.path.abspath(__file__)), './crawl_schema.yaml'))
site_schema = yamale.make_schema(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'site_schema.yaml'))
crawl_schema = yamale.make_schema(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'crawl_schema.yaml'))


def load_and_validate(schema, fname):
Expand Down
2 changes: 1 addition & 1 deletion webarticlecurator/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env pyhton3
# -*- coding: utf-8, vim: expandtab:ts=4 -*-

__version__ = '1.0.0'
__version__ = '1.0.1'

0 comments on commit 492c60e

Please sign in to comment.