notes/memoir_wordcounts.py

   1 #!/usr/bin/env python3
   2
   3 import csv
   4 import datetime
   5 import re
   6 import subprocess
   7
   8 MONTHS = {
   9     "Jan": 1,
  10     "Feb": 2,
  11     "Mar": 3,
  12     "Apr": 4,
  13     "May": 5,
  14     "Jun": 6,
  15     "Jul": 7,
  16     "Aug": 8,
  17     "Sep": 9,
  18     "Oct": 10,
  19     "Nov": 11,
  20     "Dec": 12,
  21 }
  22
  23 def wordcount_at_this_sha():
  24     result = subprocess.run("wc -w content/2023/blanchards-dangerous-idea-and-the-plight-of-the-lucid-crossdreamer.md content/2023/a-hill-of-validity-in-defense-of-meaning.md content/drafts/if-clarity-seems-like-death-to-them.md  content/drafts/agreeing-with-stalin-in-ways-that-exhibit-generally-rationalist-principles.md content/drafts/guess-ill-die.md content/drafts/on-the-public-anti-epistemology-of-dath-ilan.md content/drafts/standing-under-the-same-sky.md".split(), stdout=subprocess.PIPE)
  25     wc_lines = result.stdout.decode('utf8').split('\n')
  26     total_line = wc_lines[-2]  # last line is empty
  27     return int(total_line.split()[0])
  28
  29 def date_at_this_sha():
  30     result = subprocess.run("git show HEAD".split(), stdout=subprocess.PIPE)
  31     show_lines = result.stdout.decode('utf8').split('\n')
  32     dateline = show_lines[2]
  33     match_groups = re.search("(?P<month>\w{3}) (?P<day>\d{1,2}) \d{2}:\d{2}:\d{2} (?P<year>\d{4})", dateline).groupdict()
  34     return datetime.date(int(match_groups['year']), MONTHS[match_groups['month']], int(match_groups['day']))
  35
  36
  37 def look_back():
  38     wordcounts = []
  39     keep_going = True
  40     while keep_going:
  41         subprocess.run(["git", "checkout", "HEAD~1"])
  42         wordcount = wordcount_at_this_sha()
  43         date = date_at_this_sha()
  44         if date < datetime.date(2022, 4, 20):
  45             keep_going = False
  46         wordcounts.append((date, wordcount))
  47     # don't leave the head detached
  48     subprocess.run(["git", "checkout", "master"])
  49     return sorted(wordcounts)
  50
  51 def normalize_dates(wordcounts):
  52     eod_wordcounts = []
  53     for i in range(len(wordcounts)-1):
  54         date, wordcount = wordcounts[i]
  55         next_date, next_wordcount = wordcounts[i+1]
  56         gap = next_date - date
  57         if gap >= datetime.timedelta(1):
  58             eod_wordcounts.append((date, wordcount))
  59             for i in range(gap.days-1):
  60                 eod_wordcounts.append((date + datetime.timedelta(i+1), wordcount))
  61     return eod_wordcounts
  62
  63 def write_csv(wordcounts):
  64     with open("memoir_wordcounts.csv", 'w') as f:
  65         writer = csv.writer(f)
  66         previous = None
  67         for date, wordcount in wordcounts:
  68             diff = wordcount - previous if previous is not None else ''
  69             writer.writerow([date.strftime("%m/%d/%Y"), wordcount, diff])
  70             previous = wordcount
  71
  72
  73 if __name__ in "__main__":
  74     wordcounts = normalize_dates(look_back())
  75     write_csv(wordcounts)