check in
[Ultimately_Untrue_Thought.git] / notes / slate_starchive.py
1 import os
2 import re
3 import requests
4 import sys
5
6
7 slate_sturl_regex = re.compile(r"https?://slatestarcodex.com/\d{4}/\d{2}/\d{2}/[-a-z0-9]+/")
8
9
10 def slate_starchive_post_content(content):
11     slate_star_links = slate_sturl_regex.finditer(content)
12     revised = content
13     for match in slate_star_links:
14         link_url = match.group()
15         archive_response = requests.get(
16             "http://archive.org/wayback/available?url={}".format(link_url)
17         )
18         try:
19             archive_url = archive_response.json()['archived_snapshots']['closest']['url']
20         except KeyError:
21             print(
22                 "didn't successfully get an archive link for {}: {}".format(
23                     link_url, archive_response.text
24                 )
25             )
26         else:
27             print("replacing \033[93m{}\033[0m with \033[92m{}\033[0m".format(link_url, archive_url))
28             revised = revised.replace(link_url, archive_url)
29     return revised
30
31
32 def tree(root):
33     for path, _dirs, filenames in os.walk(root):
34         for filename in filenames:
35             if not filename.endswith(".md"):
36                 continue
37             filepath = os.path.join(path, filename)
38             with open(filepath) as f:
39                 print("examining {}".format(filepath))
40                 content = f.read()
41                 revised = slate_starchive_post_content(content)
42             if revised != content:
43                 with open(filepath, 'w') as g:
44                     print("revising {}".format(filepath))
45                     g.write(revised)
46
47
48 if __name__ == "__main__":
49     tree(sys.argv[1])