From 3622461d3c6c5b0d4405e49d855b038c4ace7b7d Mon Sep 17 00:00:00 2001 From: Ailothaen Date: Mon, 1 May 2023 19:09:15 +0200 Subject: [PATCH] "Fixed" the RecursionError that could happen on exceptionally deep threads --- CONFIG.md | 2 ++ src/config.py | 2 +- src/config.yml.example | 1 + src/downloader.py | 21 ++++++++++++++++----- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index a4bf384..8a3fb43 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -7,6 +7,7 @@ app: only-allow-from: - '1.2.3.5/32' - '3401:722::0119::/64' + disable-recursion-limit: false reddit: client-id: c66425afb0f54a27905b74c2f8449d8f client-secret: 4747072335d74e2b8ac8-e4fbec152dca @@ -22,6 +23,7 @@ defaults: |app.name|Name of the app, as it appears on the frontend.| |app.url|URL of the main endpoint of the app, where it will be accessible to the users. Do not include a trailing slash.| |app.only-allow-from|A list of IP ranges you want to restrict the app access to. If you want to allow everyone to access the app, remove this property.| +|app.disable-recursion-limit|If the level of nested replies in a thread is *very* high (about 1000+, which is very rare), the generating process will fail because of Python's recursion limit. Setting this property to `true` disables the limit. However, please note that this may lead to higher resource usage, and might potentially crash the app.| |reddit.client-id|Client ID of your Reddit app, as shown in https://www.reddit.com/prefs/apps| |reddit.client-secret|Client secret of your Reddit app, as shown in https://www.reddit.com/prefs/apps| |reddit.root|Main endpoint of Reddit. You should not have to edit this value.| diff --git a/src/config.py b/src/config.py index b9d1290..f2674f7 100644 --- a/src/config.py +++ b/src/config.py @@ -9,7 +9,7 @@ with open('config.yml', 'r') as f: config = yaml.safe_load(f) -config['app']['version'] = "1.0.0" +config['app']['version'] = "1.1.0" config['app']['project'] = "https://github.com/Ailothaen/RedditArchiver" config['reddit']['agent'] = f"{config['app']['name']} v{config['app']['version']} (by u/ailothaen)" config['runtime'] = {} diff --git a/src/config.yml.example b/src/config.yml.example index 8108929..26b9c3a 100644 --- a/src/config.yml.example +++ b/src/config.yml.example @@ -4,6 +4,7 @@ app: only-allow-from: - '198.51.100.0/24' - '2001:db8::/32' + disable-recursion-limit: false reddit: client-id: redacted client-secret: redacted diff --git a/src/downloader.py b/src/downloader.py index c52fe52..84f90ab 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -9,7 +9,7 @@ import praw, prawcore, markdown2 # stdlib -import datetime, os, logging, re +import datetime, os, logging, sys log = logging.getLogger('redditarchiver_main') @@ -220,12 +220,23 @@ def main(submission_id, token, job_id, sort="confidence"): log.error(f'{job_id}: prawcore.exceptions.ResponseException ({e})', exc_info=True) models.mark_job_failure(db, job_id, reason='BAD_AUTHENTICATION') return - - log.info(f'{job_id}: submission downloaded') + else: + log.info(f'{job_id}: submission downloaded') # Generating HTML structure - html = generate_html(submission, submission_id, now_str, sort, comments_index, comments_forest) - log.info(f'{job_id}: submission structured') + while True: # allows to retry + try: + html = generate_html(submission, submission_id, now_str, None, comments_index, comments_forest) + except RecursionError: + if config["app"]["disable-recursion-limit"]: + sys.setrecursionlimit(sys.getrecursionlimit()*2) + else: + log.error(f"The HTML structure could not be generated because the structure of the replies is going too deep for the program to handle. If you really want to handle such submissions, set the parameter \"disable-recursion-limit\" to true in the configuration. However, please note that this may lead to higher resource usage, and might potentially crash the app.") + models.mark_job_failure(db, job_id, reason='UNKNOWN') + return + else: + log.info(f'{job_id}: submission structured') + break # Saving to disk try: