introducing Pelican Scheduler
[Ultimately_Untrue_Thought.git] / provisioning / pelican_scheduler.py
diff --git a/provisioning/pelican_scheduler.py b/provisioning/pelican_scheduler.py
new file mode 100755 (executable)
index 0000000..8882e56
--- /dev/null
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+"""A script to schedule Pelican posts in advance, appropriate for a Git
+post-receive hook. Requires the `at` job-scheduling utility."""
+
+import datetime
+import os
+import re
+import subprocess
+
+WORKING_REPO = "/home/mtsw/working"
+INPUT_DIR = os.path.join(WORKING_REPO, "content")
+OUTPUT_DIR = "/var/www/html"
+PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py")
+SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format(
+    WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF)
+
+DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$",
+                            re.MULTILINE)
+JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})")
+
+def get_future_publication_times():
+    now = datetime.datetime.now()
+    times = set()
+    for path, _dirnames, filenames in os.walk(INPUT_DIR):
+        if path.endswith("drafts"):
+            continue
+        for filename in filenames:
+            if not filename.endswith(".md"):
+                continue
+            with open(os.path.join(path, filename)) as post_file:
+                match = DATELINE_REGEX.search(post_file.read())
+                if match:
+                    time = datetime.datetime.strptime(match.group(1),
+                                                      "%Y-%m-%d %H:%M")
+                    if time > now:
+                        times.add(time)
+    return times
+
+
+def get_extant_at_job_times():
+    times = set()
+    result = subprocess.run(["atq"], stdout=subprocess.PIPE)
+    job_lines = result.stdout.decode('utf8').split('\n')
+    for job_line in job_lines:
+        match = JOBLINE_REGEX.match(job_line)
+        if match:
+            times.add(datetime.datetime.strptime(match.group(1),
+                                                 "%b %d %H:%M:%S %Y"))
+    return times
+
+
+def schedule(command, when):
+    timestamp = when.strftime("%H:%M %Y-%m-%d")
+    at_command = ['at', timestamp]
+    at = subprocess.Popen(
+        at_command,
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    at.communicate(command.encode())
+
+
+def main():
+    # sync our "working" repo with the bare one
+    subprocess.run(["git", "pull"], cwd=WORKING_REPO)
+
+    # look for scheduled future posts
+    future_publication_times = get_future_publication_times()
+
+    # look at atq
+    extant_at_job_times = get_extant_at_job_times()
+
+    # if there are future posts that don't have an atq entry, schedule a
+    # site-regen at that time
+    to_schedule = future_publication_times - extant_at_job_times
+    for time in to_schedule:
+        schedule(SITEGEN_COMMAND, time)
+
+
+if __name__ == "__main__":
+    main()