replace Slate Star links with last Internet Archive version
[Ultimately_Untrue_Thought.git] / notes / slate_starchive.py
diff --git a/notes/slate_starchive.py b/notes/slate_starchive.py
new file mode 100644 (file)
index 0000000..d4f3071
--- /dev/null
@@ -0,0 +1,49 @@
+import os
+import re
+import requests
+import sys
+
+
+slate_sturl_regex = re.compile(r"https?://slatestarcodex.com/\d{4}/\d{2}/\d{2}/[-a-z0-9]+/")
+
+
+def slate_starchive_post_content(content):
+    slate_star_links = slate_sturl_regex.finditer(content)
+    revised = content
+    for match in slate_star_links:
+        link_url = match.group()
+        archive_response = requests.get(
+            "http://archive.org/wayback/available?url={}".format(link_url)
+        )
+        try:
+            archive_url = archive_response.json()['archived_snapshots']['closest']['url']
+        except KeyError:
+            print(
+                "didn't successfully get an archive link for {}: {}".format(
+                    link_url, archive_response.text
+                )
+            )
+        else:
+            print("replacing \033[93m{}\033[0m with \033[92m{}\033[0m".format(link_url, archive_url))
+            revised = revised.replace(link_url, archive_url)
+    return revised
+
+
+def tree(root):
+    for path, _dirs, filenames in os.walk(root):
+        for filename in filenames:
+            if not filename.endswith(".md"):
+                continue
+            filepath = os.path.join(path, filename)
+            with open(filepath) as f:
+                print("examining {}".format(filepath))
+                content = f.read()
+                revised = slate_starchive_post_content(content)
+            if revised != content:
+                with open(filepath, 'w') as g:
+                    print("revising {}".format(filepath))
+                    g.write(revised)
+
+
+if __name__ == "__main__":
+    tree(sys.argv[1])