c4627612a5dedd5b0440ea44e3e3a6a16a74b348
[Ultimately_Untrue_Thought.git] / memoir_wordcounts.py
1 #!/usr/bin/env python3
2
3 import csv
4 import datetime
5 import re
6 import subprocess
7
8 MONTHS = {
9     "Jan": 1,
10     "Feb": 2,
11     "Mar": 3,
12     "Apr": 4,
13     "May": 5,
14     "Jun": 6,
15     "Jul": 7,
16     "Aug": 8,
17     "Sep": 9,
18     "Oct": 10,
19     "Nov": 11,
20     "Dec": 12,
21 }
22
23 def wordcount_at_this_sha():
24     result = subprocess.run("wc -w content/2023/blanchards-dangerous-idea-and-the-plight-of-the-lucid-crossdreamer.md content/2023/a-hill-of-validity-in-defense-of-meaning.md content/drafts/if-clarity-seems-like-death-to-them.md  content/drafts/agreeing-with-stalin-in-ways-that-exhibit-generally-rationalist-principles.md content/drafts/zevis-choice.md content/drafts/on-the-public-anti-epistemology-of-dath-ilan.md content/drafts/standing-under-the-same-sky.md".split(), stdout=subprocess.PIPE)
25     wc_lines = result.stdout.decode('utf8').split('\n')
26     total_line = wc_lines[-2]  # last line is empty
27     return int(total_line.split()[0])
28
29 def date_at_this_sha():
30     result = subprocess.run("git show HEAD".split(), stdout=subprocess.PIPE)
31     show_lines = result.stdout.decode('utf8').split('\n')
32     dateline = show_lines[2]
33     match_groups = re.search("(?P<month>\w{3}) (?P<day>\d{1,2}) \d{2}:\d{2}:\d{2} (?P<year>\d{4})", dateline).groupdict()
34     return datetime.date(int(match_groups['year']), MONTHS[match_groups['month']], int(match_groups['day']))
35
36
37 def look_back():
38     wordcounts = []
39     keep_going = True
40     while keep_going:
41         subprocess.run(["git", "checkout", "HEAD~1"])
42         wordcount = wordcount_at_this_sha()
43         date = date_at_this_sha()
44         if date < datetime.date(2022, 4, 20):
45             keep_going = False
46         wordcounts.append((date, wordcount))
47     # don't leave the head detached
48     subprocess.run(["git", "checkout", "master"])
49     return sorted(wordcounts)
50
51 def normalize_dates(wordcounts):
52     eod_wordcounts = []
53     for i in range(len(wordcounts)-1):
54         date, wordcount = wordcounts[i]
55         next_date, next_wordcount = wordcounts[i+1]
56         gap = next_date - date
57         if gap >= datetime.timedelta(1):
58             eod_wordcounts.append((date, wordcount))
59             for i in range(gap.days-1):
60                 eod_wordcounts.append((date + datetime.timedelta(i+1), wordcount))
61     return eod_wordcounts
62
63 def write_csv(wordcounts):
64     with open("memoir_wordcounts.csv", 'w') as f:
65         writer = csv.writer(f)
66         previous = None
67         for date, wordcount in wordcounts:
68             diff = wordcount - previous if previous is not None else ''
69             writer.writerow([date.strftime("%m/%d/%Y"), wordcount, diff])
70             previous = wordcount
71
72
73 if __name__ in "__main__":
74     wordcounts = normalize_dates(look_back())
75     write_csv(wordcounts)