diff --git a/configs/site_schemas/24hu_rest3_schema.yaml b/configs/site_schemas/24hu_rest3_schema.yaml index ded6a13..6766e1c 100644 --- a/configs/site_schemas/24hu_rest3_schema.yaml +++ b/configs/site_schemas/24hu_rest3_schema.yaml @@ -35,7 +35,7 @@ "date_last_article": 2005-05-31 "elet-stilus2": - "archive_url_format": "https://24.hu/elet-stilus/#year/#month/" + "archive_url_format": "https://24.hu/elet-stilus/#year/#month/#day/" "date_first_article": 2005-06-01 "date_last_article": 2010-12-31 diff --git a/webarticlecurator/other_modes.py b/webarticlecurator/other_modes.py index 5c373b2..0e76483 100644 --- a/webarticlecurator/other_modes.py +++ b/webarticlecurator/other_modes.py @@ -85,7 +85,7 @@ def archive_page_contains_article_url(extract_article_urls_from_page_plus_fun, s url_to_fname = {} archive_page_for_checked_urls = defaultdict(set) - for url in w.url_index: + for url in sorted(w.url_index): raw_html = w.download_url(url) if raw_html is not None: article_urls_w_meta = extract_article_urls_from_page_plus_fun(raw_html)