X-Git-Url: http://unremediatedgender.space/source?p=Ultimately_Untrue_Thought.git;a=blobdiff_plain;f=provisioning%2Fpelican_scheduler.py;fp=provisioning%2Fpelican_scheduler.py;h=8882e565f2fe32e1ecb3fb49d6f4735f2631456f;hp=0000000000000000000000000000000000000000;hb=1cf814f52c06e6526831c1c73e58d2592ab9f5e0;hpb=5897515854dbe41cd32b84939107d3350d7c855e diff --git a/provisioning/pelican_scheduler.py b/provisioning/pelican_scheduler.py new file mode 100755 index 0000000..8882e56 --- /dev/null +++ b/provisioning/pelican_scheduler.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +"""A script to schedule Pelican posts in advance, appropriate for a Git +post-receive hook. Requires the `at` job-scheduling utility.""" + +import datetime +import os +import re +import subprocess + +WORKING_REPO = "/home/mtsw/working" +INPUT_DIR = os.path.join(WORKING_REPO, "content") +OUTPUT_DIR = "/var/www/html" +PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py") +SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format( + WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF) + +DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$", + re.MULTILINE) +JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})") + +def get_future_publication_times(): + now = datetime.datetime.now() + times = set() + for path, _dirnames, filenames in os.walk(INPUT_DIR): + if path.endswith("drafts"): + continue + for filename in filenames: + if not filename.endswith(".md"): + continue + with open(os.path.join(path, filename)) as post_file: + match = DATELINE_REGEX.search(post_file.read()) + if match: + time = datetime.datetime.strptime(match.group(1), + "%Y-%m-%d %H:%M") + if time > now: + times.add(time) + return times + + +def get_extant_at_job_times(): + times = set() + result = subprocess.run(["atq"], stdout=subprocess.PIPE) + job_lines = result.stdout.decode('utf8').split('\n') + for job_line in job_lines: + match = JOBLINE_REGEX.match(job_line) + if match: + times.add(datetime.datetime.strptime(match.group(1), + "%b %d %H:%M:%S %Y")) + return times + + +def schedule(command, when): + timestamp = when.strftime("%H:%M %Y-%m-%d") + at_command = ['at', timestamp] + at = subprocess.Popen( + at_command, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + at.communicate(command.encode()) + + +def main(): + # sync our "working" repo with the bare one + subprocess.run(["git", "pull"], cwd=WORKING_REPO) + + # look for scheduled future posts + future_publication_times = get_future_publication_times() + + # look at atq + extant_at_job_times = get_extant_at_job_times() + + # if there are future posts that don't have an atq entry, schedule a + # site-regen at that time + to_schedule = future_publication_times - extant_at_job_times + for time in to_schedule: + schedule(SITEGEN_COMMAND, time) + + +if __name__ == "__main__": + main()