-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsnapper.py
757 lines (552 loc) · 25.4 KB
/
snapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
import argparse
import concurrent.futures
import gzip
import json
import logging
import logging.handlers
import math
import os
import re
import shutil
import subprocess
import traceback
from datetime import datetime, timedelta
from operator import itemgetter
import psutil
import requests
import pidfile
from jsonschema import validate
from reports.discord_report import create_discord_report
from reports.email_report import create_email_report
from utils import format_delta, get_relative_path, human_readable_size, run_script
#
# Read config
with open(get_relative_path(__file__, './config.json'), 'r') as f:
config = json.load(f)
with open(get_relative_path(__file__, './config.schema.json'), 'r') as f:
schema = json.load(f)
validate(instance=config, schema=schema)
sync_count_file = get_relative_path(__file__, './snapper.syncCount')
#
# Configure logging
def rotator(source, dest):
with open(source, 'rb') as f_in:
with gzip.open(dest, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(source)
def setup_logger(name, log_file, level='INFO'):
log_dir, max_count = itemgetter('dir', 'max_count')(config['logs'])
if not os.path.exists(log_dir):
os.makedirs(log_dir)
log_file_path = os.path.join(log_dir, log_file)
needs_rollover = os.path.isfile(log_file_path)
handler = logging.handlers.RotatingFileHandler(log_file_path, backupCount=max(max_count, 1))
handler.setFormatter(logging.Formatter('[%(asctime)s] - [%(levelname)s] - %(message)s'))
handler.rotator = rotator
handler.namer = lambda file_name: file_name + '.gz'
if needs_rollover:
handler.doRollover()
logger = logging.getLogger(name)
logger.setLevel(level)
if logger.hasHandlers():
logger.handlers.clear()
logger.addHandler(handler)
logger.propagate = False
return logger
#
# Parse command line args
parser = argparse.ArgumentParser(description='SnapRAID execution wrapper')
parser.add_argument('-f', '--force',
help='Ignore any set thresholds or warnings and execute all jobs regardless',
action='store_true')
args = vars(parser.parse_args())
force_script_execution = args['force']
#
# Notification helpers
def notify_and_handle_error(message, error):
log.error(message)
log.error(''.join(traceback.format_exception(None, error, error.__traceback__)))
send_email('WARNING! SnapRAID jobs unsuccessful', message.replace('\n', '<br>'))
notify_warning(message)
exit(1)
def notify_warning(message, embeds=None):
return send_discord(f':warning: [**WARNING!**] {message}', embeds=embeds)
def notify_info(message, embeds=None, message_id=None):
return send_discord(f':information_source: [**INFO**] {message}', embeds, message_id)
def send_discord(message, embeds=None, message_id=None):
is_enabled, webhook_id, webhook_token = itemgetter(
'enabled', 'webhook_id', 'webhook_token')(config['notifications']['discord'])
if not is_enabled:
return
if embeds is None:
embeds = []
data = {
'content': message,
'embeds': embeds,
'username': 'Snapper',
}
update_message = message_id is not None
base_url = f'https://discord.com/api/webhooks/{webhook_id}/{webhook_token}'
if update_message:
discord_url = f'{base_url}/messages/{message_id}'
response = requests.patch(discord_url, json=data)
else:
discord_url = f'{base_url}?wait=true'
response = requests.post(discord_url, json=data)
try:
response.raise_for_status()
log.debug('Successfully posted message to discord')
if not update_message:
data = response.json()
# Return the message ID in case we want to manipulate it
return data['id']
except requests.exceptions.HTTPError as err:
# Handle error when trying to update a message
if update_message:
log.debug('Failed to update message, posting new.')
return send_discord(message, embeds=embeds)
log.error('Unable to send message to discord')
log.error(str(err))
def send_email(subject, message):
log.debug('Attempting to send email...')
is_enabled, mail_bin, from_email, to_email = itemgetter(
'enabled', 'binary', 'from_email', 'to_email')(config['notifications']['email'])
if not is_enabled:
return
if not os.path.isfile(mail_bin):
raise FileNotFoundError('Unable to find mail executable', mail_bin)
result = subprocess.run([
mail_bin,
'-a', 'Content-Type: text/html',
'-s', subject,
'-r', from_email,
to_email
], input=message, capture_output=True, text=True)
if result.stderr:
raise ConnectionError('Unable to send email', result.stderr)
log.debug(f'Successfully sent email to {to_email}')
#
# Snapraid Helpers
def is_running():
for process in psutil.process_iter(attrs=['name']):
if process.name().lower() == 'snapraid':
return True
return False
def set_snapraid_priority():
# Setting nice is enough, as ionice follows that per the documentation here:
# https://www.kernel.org/doc/Documentation/block/ioprio.txt
#
# The default class `IOPRIO_CLASS_BE` sets ionice as: `io_nice = (cpu_nice + 20) / 5.`
# The default nice is 0, which sets ionice to 4.
# We set nice to 10, which results in ionice of 6 - this way it's not entirely down prioritized.
nice_level = config['snapraid']['nice']
os.nice(nice_level)
p = psutil.Process(os.getpid())
p.ionice(psutil.IOPRIO_CLASS_BE, math.floor((nice_level + 20) / 5))
def spin_down():
hdparm_bin, is_enabled, drives = itemgetter('binary', 'enabled', 'drives')(config['spindown'])
if not is_enabled:
return
if not os.path.isfile(hdparm_bin):
raise FileNotFoundError('Unable to find hdparm executable', hdparm_bin)
log.info(f'Attempting to spin down all {drives} drives...')
content_files, parity_files = get_snapraid_config()
drives_to_spin_down = parity_files + (content_files if drives == 'all' else [])
shell_command = (f'{hdparm_bin} -y $('
f'df {" ".join(drives_to_spin_down)} | ' # Get the drives
f'tail -n +2 | ' # Remove the header
f'cut -d " " -f1 | ' # Split by space, get the first item
f'tr "\\n" " "' # Replace newlines with spaces
f')')
try:
process = subprocess.run(shell_command, shell=True, capture_output=True, text=True)
rc = process.returncode
if rc == 0:
log.info('Successfully spun down drives.')
else:
log.error(f'Unable to successfully spin down hard drives, see error output below.')
log.error(process.stderr)
log.error(f'Shell command executed: {shell_command}')
except Exception as err:
log.error(f'Encountered exception while attempting to spin down drives:')
log.error(str(err))
#
# Snapraid Commands
def run_snapraid(commands, progress_handler=None, allowed_return_codes=[]):
snapraid_bin, snapraid_config = itemgetter('binary', 'config')(config['snapraid'])
if not os.path.isfile(snapraid_bin):
raise FileNotFoundError('Unable to find SnapRAID executable', snapraid_bin)
if is_running():
raise ChildProcessError('SnapRAID already seems to be running, unable to proceed.')
std_out = []
std_err = []
with (subprocess.Popen(
[snapraid_bin, '--conf', snapraid_config] + commands,
shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
preexec_fn=set_snapraid_priority, encoding="utf-8",
errors='replace'
) as process,
concurrent.futures.ThreadPoolExecutor(2) as tpe,
):
def read_stdout(file):
for line in file:
rline = line.rstrip()
raw_log.info(rline)
if progress_handler is None or not progress_handler(rline):
std_out.append(rline)
def read_stderr(file):
for line in file:
rline = line.rstrip()
raw_log.error(rline)
std_err.append(rline)
f1 = tpe.submit(read_stdout, process.stdout)
f2 = tpe.submit(read_stderr, process.stderr)
f1.result()
f2.result()
rc = process.returncode
if not (rc == 0 or rc in allowed_return_codes):
last_lines = '\n'.join(std_err[-10:])
raise SystemError(f'A critical SnapRAID error was encountered during command '
f'`snapraid {" ".join(commands)}`. The process exited with code `{rc}`.\n'
f'Here are the last **10 lines** from the error log:\n```\n'
f'{last_lines}\n```\nThis requires your immediate attention.',
'\n'.join(std_err))
return '\n'.join(std_out), '\n'.join(std_err)
def get_status():
snapraid_status, _ = run_snapraid(['status'])
stats_regex = re.compile(
r'^ *(?P<file_count>\d+) +(?P<fragmented_files>\d+) +(?P<excess_fragments>\d+) +('
r'?P<wasted_gb>[-.\d]+) +(?P<used_gb>\d+) +(?P<free_gb>\d+) +(?P<use_percent>\d+)%(?: +('
r'?P<drive_name>\S+)|$)',
flags=re.MULTILINE)
drive_stats = [m.groupdict() for m in stats_regex.finditer(snapraid_status)]
scrub_info = re.search(
r'scrubbed (?P<scrub_age>\d+) days ago, the median (?P<median>\d+), the newest ('
r'?P<newest>\d+)',
snapraid_status)
unscrubbed_percent = re.search(
r'^The (?P<not_scrubbed_percent>\d+)% of the array is not scrubbed', snapraid_status,
flags=re.MULTILINE)
error_count = re.search(r'^DANGER! In the array there are (?P<error_count>\d+) errors!',
snapraid_status, flags=re.MULTILINE)
zero_subsecond_count = re.search(
r'^You have (?P<touch_files>\d+) files with (?:a )?zero sub-second timestamp', snapraid_status,
flags=re.MULTILINE)
sync_in_progress = bool(
re.search(r'^You have a sync in progress', snapraid_status, flags=re.MULTILINE))
if scrub_info is None:
raise ValueError('Unable to parse SnapRAID status')
unscrubbed_percent = 0 if unscrubbed_percent is None else int(unscrubbed_percent[1])
zero_subsecond_count = 0 if zero_subsecond_count is None else int(zero_subsecond_count[1])
error_count = 0 if error_count is None else int(error_count[1])
return (
drive_stats,
{
'unscrubbed': unscrubbed_percent,
'scrub_age': int(scrub_info[1]),
'median': int(scrub_info[2]),
'newest': int(scrub_info[3])
},
error_count,
zero_subsecond_count,
sync_in_progress
)
def get_diff():
snapraid_diff, _ = run_snapraid(['diff'], allowed_return_codes=[2])
diff_regex = re.compile(r'''^ *(?P<equal>\d+) equal$
^ *(?P<added>\d+) added$
^ *(?P<removed>\d+) removed$
^ *(?P<updated>\d+) updated$
^ *(?P<moved>\d+) moved$
^ *(?P<copied>\d+) copied$
^ *(?P<restored>\d+) restored$''', flags=re.MULTILINE)
diff_data = [m.groupdict() for m in diff_regex.finditer(snapraid_diff)]
if len(diff_data) == 0:
raise ValueError('Unable to parse diff output from SnapRAID, not proceeding.')
diff_int = dict([a, int(x)] for a, x in diff_data[0].items())
return diff_int
def get_smart():
smart_data, _ = run_snapraid(['smart'])
drive_regex = re.compile(r'^ *(?P<temp>\d+|-) +(?P<power_on_days>\d+|-) +('
r'?P<error_count>\d+|-) +(?P<fp>\d+%|-|SSD) +(?P<size>\S+) +('
r'?P<serial>\S+) +(?P<device>\S+) +(?P<disk>\S+)$', flags=re.MULTILINE)
drive_data = [m.groupdict() for m in drive_regex.finditer(smart_data)]
global_fp = re.search(r'next year is (?P<total_fp>\d+)%', smart_data).group(1)
if drive_data is None or global_fp is None:
raise ValueError('Unable to parse drive data or global failure percentage, not proceeding.')
return drive_data, global_fp
def handle_progress():
start = datetime.now()
message_id = None
def handler(data):
nonlocal start
nonlocal message_id
progress_data = re.search(r'^(?P<progress>\d+)%, (?P<progress_mb>\d+) MB'
r'(?:, (?P<speed_mb>\d+) MB/s, (?P<speed_stripe>\d+) stripe/s, '
r'CPU (?P<cpu>\d+)%, (?P<eta_h>\d+):(?P<eta_m>\d+) ETA)?$', data,
flags=re.MULTILINE)
is_progress = bool(progress_data)
if is_progress and datetime.now() - start >= timedelta(minutes=1):
msg = f'Current progress **{progress_data.group(1)}%** ' \
f'(`{human_readable_size(int(progress_data.group(2)))}`)'
if progress_data.group(3) is not None:
msg = (f'{msg} — processing at **{int(progress_data.group(3)):,} MB/s** '
f'(*{int(progress_data.group(4)):,} stripe/s, {progress_data.group(5)}% CPU*). '
f'**ETA:** {progress_data.group(6)}h {progress_data.group(7)}m')
if message_id is None:
message_id = notify_info(msg)
else:
new_message_id = notify_info(msg, message_id=message_id)
if new_message_id:
message_id = new_message_id
start = datetime.now()
return is_progress
return handler
def _run_sync(run_count):
pre_hash, auto_sync = itemgetter('pre_hash', 'auto_sync')(config['snapraid']['sync'])
auto_sync_enabled, max_attempts = itemgetter('enabled', 'max_attempts')(auto_sync)
try:
log.info(f'Running SnapRAID sync ({run_count}) '
f'{"with" if pre_hash else "without"} pre-hashing...')
notify_info(f'Syncing **({run_count})**...')
run_snapraid(['sync', '-h'] if pre_hash else ['sync'], handle_progress())
except SystemError as err:
sync_errors = err.args[1]
if sync_errors is None:
raise err
# The three errors in the regex are considered "safe", i.e.,
# a file was just modified or removed during the sync.
#
# This is normally nothing to be worried about, and the operation can just be rerun.
# If there are other errors in the output, and not only these, we don't want to re-run
# the sync command, because it could be things we need to have a closer look at.
bad_errors = re.sub(r'^(?:'
r'WARNING! You cannot modify (?:files|data disk) during a sync\.|'
r'Unexpected (?:time|size) change at file .+|'
r'Missing file .+|'
r'Rerun the sync command when finished\.|'
r'WARNING! With \d+ disks it\'s recommended to use \w+ parity levels\.|'
r'WARNING! Unexpected file errors!'
r')\s*',
'', sync_errors, flags=re.MULTILINE | re.IGNORECASE)
should_rerun = bad_errors == '' and re.search(r'^Rerun the sync command when finished',
sync_errors,
flags=re.MULTILINE | re.IGNORECASE)
if should_rerun:
log.info(
'SnapRAID has indicated another sync is recommended, due to disks or files being '
'modified during the sync process.')
if should_rerun and auto_sync_enabled and run_count < max_attempts:
log.info('Re-running sync command with identical options...')
_run_sync(run_count + 1)
else:
raise err
def run_sync():
start = datetime.now()
_run_sync(1)
end = datetime.now()
sync_job_time = format_delta(end - start)
log.info(f'Sync job finished, elapsed time {sync_job_time}')
notify_info(f'Sync job finished, elapsed time **{sync_job_time}**')
return sync_job_time
def run_scrub():
enabled, scrub_new, check_percent, min_age, scrub_delayed_run = itemgetter(
'enabled', 'scrub_new', 'check_percent', 'min_age', 'scrub_delayed_run'
)(config['snapraid']['scrub'])
if not enabled:
log.info('Scrubbing not enabled, skipping.')
return None
# this block only runs for users who have scrub_delayed_run nonzero (enabled)
if scrub_delayed_run > 0:
log.info('Delayed scrub is enabled.')
# get sync_count from file or 0 if not exist or no number
try:
sync_count = int(sync_count_file.read_text().strip())
except (FileNotFoundError, ValueError):
sync_count = 0
if sync_count >= scrub_delayed_run:
# Run scrub job. If count is 0, scrub was forced externally
log.info(
f'Number of delayed runs has reached/exceeded threshold ({scrub_delayed_run}). A SCRUB job will run.'
)
else:
# DON'T run, increment count and skip the job
sync_count += 1
# write the sync_count to file
sync_count_file.write_text(str(sync_count))
if sync_count == scrub_delayed_run:
log.info('Scrub job will run next time.')
notify_info('Scrub job will run next time.')
else:
msg = f'{scrub_delayed_run - sync_count} runs until the next scrub. **NOT** proceeding with SCRUB job.'
log.info(msg)
notify_info(msg)
return None # exit early
log.info('Running scrub job...')
start = datetime.now()
if scrub_new:
log.info('Scrubbing new blocks...')
notify_info('Scrubbing new blocks...')
scrub_new_output, _ = run_snapraid(['scrub', '-p', 'new'], handle_progress())
log.info('Scrubbing old blocks...')
notify_info('Scrubbing old blocks...')
scrub_output, _ = run_snapraid(
['scrub', '-p', str(check_percent), '-o', str(min_age)],
handle_progress())
end = datetime.now()
scrub_job_time = format_delta(end - start)
log.info(f'Scrub job finished, elapsed time {scrub_job_time}')
notify_info(f'Scrub job finished, elapsed time **{scrub_job_time}**')
# reset the scrub counter
if os.path.exists(sync_count_file):
os.remove(sync_count_file)
return scrub_job_time
def run_touch():
run_snapraid(['touch'])
#
# Sanity Checker
def get_snapraid_config():
config_file = config['snapraid']['config']
if not os.path.isfile(config_file):
raise FileNotFoundError('Unable to find SnapRAID configuration', config_file)
with open(config_file, 'r') as file:
snapraid_config = file.read()
file_regex = re.compile(r'^(content|(?:\d+-)?parity) +(.+/\w+.(?:content|(?:\d+-)?parity)) *$',
flags=re.MULTILINE)
parity_files = []
content_files = []
for m in file_regex.finditer(snapraid_config):
if m[1] == 'content':
content_files.append(m[2])
else:
parity_files.append(m[2])
return content_files, parity_files
def sanity_check():
content_files, parity_files = get_snapraid_config()
files = content_files + parity_files
for file in files:
if not os.path.isfile(file):
raise FileNotFoundError('Unable to locate required content/parity file', file)
log.info(f'All {len(files)} content and parity files found, proceeding.')
#
# Main
def main():
try:
total_start = datetime.now()
log.info('Snapper started')
pre_run, post_run = itemgetter('pre_run', 'post_run')(config['scripts'])
if pre_run is not None:
log.info('Running pre-run script...')
run_script(pre_run)
notify_info('Starting SnapRAID jobs...')
log.info('Running sanity checks...')
sanity_check()
log.info('Checking for errors and files with zero sub-second timestamps...')
(_, _, error_count, zero_subsecond_count, sync_in_progress) = get_status()
if error_count > 0:
if force_script_execution:
log.error(f'There are {error_count} error(s) in the array, '
f'ignoring due to forced run.')
notify_warning(f'There are **{error_count}** error(s) in the array, '
f'ignoring due to forced run.')
else:
raise SystemError(
f'There are {error_count} error(s) in the array, you should review '
f'this immediately. All jobs have been halted.')
if zero_subsecond_count > 0:
log.info(f'Found {zero_subsecond_count} file(s) with zero sub-second timestamp')
log.info('Running touch job...')
run_touch()
log.info('Get SnapRAID diff...')
diff_data = get_diff()
log.info(f'Diff output: {diff_data["equal"]} equal, ' +
f'{diff_data["added"]} added, ' +
f'{diff_data["removed"]} removed, ' +
f'{diff_data["updated"]} updated, ' +
f'{diff_data["moved"]} moved, ' +
f'{diff_data["copied"]} copied, ' +
f'{diff_data["restored"]} restored')
if sum(diff_data.values()) - diff_data["equal"] > 0 or sync_in_progress or \
force_script_execution:
updated_threshold, removed_threshold = itemgetter('updated', 'removed')(
config['snapraid']['diff']['thresholds'])
if force_script_execution:
log.info('Ignoring any thresholds and forcefully proceeding with sync.')
elif 0 < updated_threshold < diff_data["updated"]:
raise ValueError(f'More files ({diff_data["updated"]}) have been updated than the '
f'configured max ({updated_threshold})')
elif 0 < removed_threshold < diff_data["removed"]:
raise ValueError(
f'More files ({diff_data["removed"]}) have been removed than the configured '
f'max ({removed_threshold})')
elif sync_in_progress:
log.info('A previous sync in progress has been detected, resuming.')
else:
if updated_threshold > 0:
log.info(f'Fewer files updated ({diff_data["updated"]}) than the configured '
f'limit ({updated_threshold}), proceeding.')
if removed_threshold > 0:
log.info(f'Fewer files removed ({diff_data["removed"]}) than the configured '
f'limit ({removed_threshold}), proceeding.')
sync_job_time = run_sync()
sync_job_ran = True
else:
log.info('No changes to sync, skipping.')
notify_info('No changes to sync')
sync_job_ran = False
sync_job_time = None
scrub_job_time = run_scrub()
scrub_job_ran = scrub_job_time is not None
log.info('Fetching SnapRAID status...')
(drive_stats, scrub_stats, error_count, _, _) = get_status()
log.info(f'{scrub_stats["unscrubbed"]}% of the array has not been scrubbed, with the '
f'oldest block at {scrub_stats["scrub_age"]} day(s), the '
f'median at {scrub_stats["median"]} day(s), and the newest at '
f'{scrub_stats["newest"]} day(s).')
log.info('Fetching smart data...')
(smart_drive_data, global_fp) = get_smart()
log.info(f'Drive failure probability this year is {global_fp}%')
total_time = format_delta(datetime.now() - total_start)
report_data = {
'sync_job_ran': sync_job_ran,
'scrub_job_ran': scrub_job_ran,
'sync_job_time': sync_job_time,
'scrub_job_time': scrub_job_time,
'diff_data': diff_data,
'zero_subsecond_count': zero_subsecond_count,
'scrub_stats': scrub_stats,
'drive_stats': drive_stats,
'smart_drive_data': smart_drive_data,
'global_fp': global_fp,
'total_time': total_time
}
email_report = create_email_report(report_data)
send_email('SnapRAID Job Completed Successfully', email_report)
if config['notifications']['discord']['enabled']:
(discord_message, embeds) = create_discord_report(report_data)
send_discord(discord_message, embeds)
spin_down()
log.info('SnapRAID jobs completed successfully, exiting.')
except (ValueError, ChildProcessError, SystemError) as err:
notify_and_handle_error(err.args[0], err)
except ConnectionError as err:
log.error(str(err))
except FileNotFoundError as err:
notify_and_handle_error(f'{err.args[0]} - missing file path `{err.args[1]}`', err)
except BaseException as err:
notify_and_handle_error(
f'Unhandled Python Exception `{str(err) if str(err) else "unknown error"}`', err)
finally:
if post_run is not None:
log.info('Running post-run script...')
run_script(post_run)
try:
with pidfile.PIDFile('/tmp/snapper.pid'):
# Setup loggers after pidfile has been acquired
raw_log = setup_logger('snapper_raw', 'snapper_raw.log')
log = setup_logger('snapper', 'snapper.log')
log.handlers = raw_log.handlers + log.handlers
log.addHandler(logging.StreamHandler())
main()
except pidfile.AlreadyRunningError:
print('snapper already appears to be running!')