From 1cf814f52c06e6526831c1c73e58d2592ab9f5e0 Mon Sep 17 00:00:00 2001 From: "M. Taylor Saotome-Westlake" Date: Wed, 22 Nov 2017 19:43:22 -0800 Subject: [PATCH] introducing Pelican Scheduler I haven't tested this end-to-end yet, but I've spot-checked most of the individual pieces and it's plausible that this will work. Some might say, "You know, you could schedule posts in advance without writing any code at all if you just used WordPress" but then you'd be using WordPress --- provisioning/pelican_scheduler.py | 81 +++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100755 provisioning/pelican_scheduler.py diff --git a/provisioning/pelican_scheduler.py b/provisioning/pelican_scheduler.py new file mode 100755 index 0000000..8882e56 --- /dev/null +++ b/provisioning/pelican_scheduler.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +"""A script to schedule Pelican posts in advance, appropriate for a Git +post-receive hook. Requires the `at` job-scheduling utility.""" + +import datetime +import os +import re +import subprocess + +WORKING_REPO = "/home/mtsw/working" +INPUT_DIR = os.path.join(WORKING_REPO, "content") +OUTPUT_DIR = "/var/www/html" +PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py") +SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format( + WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF) + +DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$", + re.MULTILINE) +JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})") + +def get_future_publication_times(): + now = datetime.datetime.now() + times = set() + for path, _dirnames, filenames in os.walk(INPUT_DIR): + if path.endswith("drafts"): + continue + for filename in filenames: + if not filename.endswith(".md"): + continue + with open(os.path.join(path, filename)) as post_file: + match = DATELINE_REGEX.search(post_file.read()) + if match: + time = datetime.datetime.strptime(match.group(1), + "%Y-%m-%d %H:%M") + if time > now: + times.add(time) + return times + + +def get_extant_at_job_times(): + times = set() + result = subprocess.run(["atq"], stdout=subprocess.PIPE) + job_lines = result.stdout.decode('utf8').split('\n') + for job_line in job_lines: + match = JOBLINE_REGEX.match(job_line) + if match: + times.add(datetime.datetime.strptime(match.group(1), + "%b %d %H:%M:%S %Y")) + return times + + +def schedule(command, when): + timestamp = when.strftime("%H:%M %Y-%m-%d") + at_command = ['at', timestamp] + at = subprocess.Popen( + at_command, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + at.communicate(command.encode()) + + +def main(): + # sync our "working" repo with the bare one + subprocess.run(["git", "pull"], cwd=WORKING_REPO) + + # look for scheduled future posts + future_publication_times = get_future_publication_times() + + # look at atq + extant_at_job_times = get_extant_at_job_times() + + # if there are future posts that don't have an atq entry, schedule a + # site-regen at that time + to_schedule = future_publication_times - extant_at_job_times + for time in to_schedule: + schedule(SITEGEN_COMMAND, time) + + +if __name__ == "__main__": + main() -- 2.17.1