check in

[Ultimately_Untrue_Thought.git] / notes / memoir_wordcounts.py
diff --git a/notes/memoir_wordcounts.py b/notes/memoir_wordcounts.py

index 6d2e0f9..be5cd65 100755 (executable)
--- a/notes/memoir_wordcounts.py
+++ b/notes/memoir_wordcounts.py
@@ -1,25 +1,75 @@
  #!/usr/bin/env python3
  
+import csv
+import datetime
+import re
  import subprocess
  
-shas = [
-    '347fe081c6',
-    '4681a3764a',
-    '867b6583c7',
-    'a8b07bd83a',
-    'cfdc07319c',
-    'bbb4315cfe'
-]
-
-for sha in shas:
-    subprocess.run(["git", "checkout", sha])
-    subprocess.run("wc content/drafts/blanchards-dangerous-idea-and-the-plight-of-the-lucid-crossdreamer.md content/drafts/a-hill-of-validity-in-defense-of-meaning.md content/drafts/agreeing-with-stalin-in-ways-that-exhibit-generally-rationalist-principles.md".split())
-
-# 30 Apr 347fe081c6   6746
-# 30 May 4681a3764a  10308   +3562
-# 30 Jun 867b6583c7  13313   +3005
-# 31 Jul a8b07bd83a  20391   +7078
-# 28 Aug cfdc07319c  36784  +16393
-# 30 Sep bbb4315cfe  49533  +12749
-# 30 Oct c9b6c312d1  66302  +16769
-# 30 Nov (projected) 83302??
+MONTHS = {
+    "Jan": 1,
+    "Feb": 2,
+    "Mar": 3,
+    "Apr": 4,
+    "May": 5,
+    "Jun": 6,
+    "Jul": 7,
+    "Aug": 8,
+    "Sep": 9,
+    "Oct": 10,
+    "Nov": 11,
+    "Dec": 12,
+}
+
+def wordcount_at_this_sha():
+    result = subprocess.run("wc -w content/2023/blanchards-dangerous-idea-and-the-plight-of-the-lucid-crossdreamer.md content/2023/a-hill-of-validity-in-defense-of-meaning.md content/drafts/if-clarity-seems-like-death-to-them.md  content/drafts/agreeing-with-stalin-in-ways-that-exhibit-generally-rationalist-principles.md content/drafts/guess-ill-die.md content/drafts/on-the-public-anti-epistemology-of-dath-ilan.md content/drafts/standing-under-the-same-sky.md".split(), stdout=subprocess.PIPE)
+    wc_lines = result.stdout.decode('utf8').split('\n')
+    total_line = wc_lines[-2]  # last line is empty
+    return int(total_line.split()[0])
+
+def date_at_this_sha():
+    result = subprocess.run("git show HEAD".split(), stdout=subprocess.PIPE)
+    show_lines = result.stdout.decode('utf8').split('\n')
+    dateline = show_lines[2]
+    match_groups = re.search("(?P<month>\w{3}) (?P<day>\d{1,2}) \d{2}:\d{2}:\d{2} (?P<year>\d{4})", dateline).groupdict()
+    return datetime.date(int(match_groups['year']), MONTHS[match_groups['month']], int(match_groups['day']))
+
+
+def look_back():
+    wordcounts = []
+    keep_going = True
+    while keep_going:
+        subprocess.run(["git", "checkout", "HEAD~1"])
+        wordcount = wordcount_at_this_sha()
+        date = date_at_this_sha()
+        if date < datetime.date(2022, 4, 20):
+            keep_going = False
+        wordcounts.append((date, wordcount))
+    # don't leave the head detached
+    subprocess.run(["git", "checkout", "master"])
+    return sorted(wordcounts)
+
+def normalize_dates(wordcounts):
+    eod_wordcounts = []
+    for i in range(len(wordcounts)-1):
+        date, wordcount = wordcounts[i]
+        next_date, next_wordcount = wordcounts[i+1]
+        gap = next_date - date
+        if gap >= datetime.timedelta(1):
+            eod_wordcounts.append((date, wordcount))
+            for i in range(gap.days-1):
+                eod_wordcounts.append((date + datetime.timedelta(i+1), wordcount))
+    return eod_wordcounts
+
+def write_csv(wordcounts):
+    with open("memoir_wordcounts.csv", 'w') as f:
+        writer = csv.writer(f)
+        previous = None
+        for date, wordcount in wordcounts:
+            diff = wordcount - previous if previous is not None else ''
+            writer.writerow([date.strftime("%m/%d/%Y"), wordcount, diff])
+            previous = wordcount
+
+
+if __name__ in "__main__":
+    wordcounts = normalize_dates(look_back())
+    write_csv(wordcounts)