From: M. Taylor Saotome-Westlake <ultimatelyuntruethought@gmail.com>
Date: Thu, 23 Nov 2017 03:43:22 +0000 (-0800)
Subject: introducing Pelican Scheduler
X-Git-Url: http://unremediatedgender.space/source?p=Ultimately_Untrue_Thought.git;a=commitdiff_plain;h=1cf814f52c06e6526831c1c73e58d2592ab9f5e0;hp=5897515854dbe41cd32b84939107d3350d7c855e

introducing Pelican Scheduler

I haven't tested this end-to-end yet, but I've spot-checked most of the
individual pieces and it's plausible that this will work.

Some might say, "You know, you could schedule posts in advance without writing
any code at all if you just used WordPress"

but

then you'd be using WordPress
---

diff --git a/provisioning/pelican_scheduler.py b/provisioning/pelican_scheduler.py
new file mode 100755
index 0000000..8882e56
--- /dev/null
+++ b/provisioning/pelican_scheduler.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+"""A script to schedule Pelican posts in advance, appropriate for a Git
+post-receive hook. Requires the `at` job-scheduling utility."""
+
+import datetime
+import os
+import re
+import subprocess
+
+WORKING_REPO = "/home/mtsw/working"
+INPUT_DIR = os.path.join(WORKING_REPO, "content")
+OUTPUT_DIR = "/var/www/html"
+PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py")
+SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format(
+    WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF)
+
+DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$",
+                            re.MULTILINE)
+JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})")
+
+def get_future_publication_times():
+    now = datetime.datetime.now()
+    times = set()
+    for path, _dirnames, filenames in os.walk(INPUT_DIR):
+        if path.endswith("drafts"):
+            continue
+        for filename in filenames:
+            if not filename.endswith(".md"):
+                continue
+            with open(os.path.join(path, filename)) as post_file:
+                match = DATELINE_REGEX.search(post_file.read())
+                if match:
+                    time = datetime.datetime.strptime(match.group(1),
+                                                      "%Y-%m-%d %H:%M")
+                    if time > now:
+                        times.add(time)
+    return times
+
+
+def get_extant_at_job_times():
+    times = set()
+    result = subprocess.run(["atq"], stdout=subprocess.PIPE)
+    job_lines = result.stdout.decode('utf8').split('\n')
+    for job_line in job_lines:
+        match = JOBLINE_REGEX.match(job_line)
+        if match:
+            times.add(datetime.datetime.strptime(match.group(1),
+                                                 "%b %d %H:%M:%S %Y"))
+    return times
+
+
+def schedule(command, when):
+    timestamp = when.strftime("%H:%M %Y-%m-%d")
+    at_command = ['at', timestamp]
+    at = subprocess.Popen(
+        at_command,
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    at.communicate(command.encode())
+
+
+def main():
+    # sync our "working" repo with the bare one
+    subprocess.run(["git", "pull"], cwd=WORKING_REPO)
+
+    # look for scheduled future posts
+    future_publication_times = get_future_publication_times()
+
+    # look at atq
+    extant_at_job_times = get_extant_at_job_times()
+
+    # if there are future posts that don't have an atq entry, schedule a
+    # site-regen at that time
+    to_schedule = future_publication_times - extant_at_job_times
+    for time in to_schedule:
+        schedule(SITEGEN_COMMAND, time)
+
+
+if __name__ == "__main__":
+    main()