check in
[Ultimately_Untrue_Thought.git] / provisioning / pelican_scheduler.py
1 #!/usr/bin/env python3
2
3 """A script to schedule Pelican posts in advance, appropriate for a Git
4 post-receive hook. Requires the `at` job-scheduling utility."""
5
6 import datetime
7 import os
8 import re
9 import subprocess
10
11 WORKING_REPO = "/home/mtsw/working"
12 INPUT_DIR = os.path.join(WORKING_REPO, "content")
13 OUTPUT_DIR = "/var/www/html"
14 PUBLISH_CONF = os.path.join(WORKING_REPO, "publishconf.py")
15 SITEGEN_COMMAND = "bash -c 'cd {} && source bin/activate && pelican {} -o {} -s {}'".format(
16     WORKING_REPO, INPUT_DIR, OUTPUT_DIR, PUBLISH_CONF)
17
18 DATELINE_REGEX = re.compile(r"^Date: *(\d{4}-\d{2}-\d{2} \d{2}:\d{2}) *$",
19                             re.MULTILINE)
20 JOBLINE_REGEX = re.compile(r"\d+\s\w{3} (\w{3} +\d{1,2} \d{2}:\d{2}:\d{2} \d{4})")
21
22 def get_future_publication_times():
23     now = datetime.datetime.now()
24     times = set()
25     for path, _dirnames, filenames in os.walk(INPUT_DIR):
26         if path.endswith("drafts"):
27             continue
28         for filename in filenames:
29             if not filename.endswith(".md"):
30                 continue
31             with open(os.path.join(path, filename)) as post_file:
32                 match = DATELINE_REGEX.search(post_file.read())
33                 if match:
34                     time = datetime.datetime.strptime(match.group(1),
35                                                       "%Y-%m-%d %H:%M")
36                     if time > now:
37                         times.add(time)
38     return times
39
40
41 def get_extant_at_job_times():
42     times = set()
43     result = subprocess.run(["atq"], stdout=subprocess.PIPE)
44     job_lines = result.stdout.decode('utf8').split('\n')
45     for job_line in job_lines:
46         match = JOBLINE_REGEX.match(job_line)
47         if match:
48             times.add(datetime.datetime.strptime(match.group(1),
49                                                  "%b %d %H:%M:%S %Y"))
50     return times
51
52
53 def schedule(command, when):
54     timestamp = when.strftime("%H:%M %Y-%m-%d")
55     at_command = ['at', timestamp]
56     at = subprocess.Popen(
57         at_command,
58         stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
59     )
60     at.communicate(command.encode())
61
62
63 def main():
64     # sync our "working" repo with the bare one
65
66     # but first, don't let the fact that this is running as a hook on the bare
67     # repo confuse us
68     our_env = os.environ.copy()
69     del our_env['GIT_DIR']
70
71     for git_cmd in [["git", "fetch", "origin"],
72                     ["git", "reset", "--hard", "origin/master"]]:
73         subprocess.run(git_cmd, cwd=WORKING_REPO, env=our_env)
74
75     # sitegen now! (even if there are no posts to queue, we can at least update
76     # /drafts/) and make any back-edits to published posts live
77     subprocess.run(SITEGEN_COMMAND,
78                    # XXX: `shell=True` is contrary to the moral law
79                    shell=True)
80
81     # look for scheduled future posts
82     future_publication_times = get_future_publication_times()
83
84     # look at atq
85     extant_at_job_times = get_extant_at_job_times()
86
87     # if there are future posts that don't have an atq entry, schedule a
88     # site-regen at that time
89     to_schedule = future_publication_times - extant_at_job_times
90     for time in to_schedule:
91         schedule(SITEGEN_COMMAND, time)
92
93
94 if __name__ == "__main__":
95     main()