--- /dev/null
+import os
+import re
+import sys
+from collections import Counter
+from urllib.parse import urlparse
+
+# https://davidwells.io/snippets/regex-match-markdown-links
+# (slightly fixed to include `-` and others as valid, thx Laurin Neff in comments)
+url_regex = re.compile(r"\[([\w\s\d]+)\]\(((?:\/|https?:\/\/)[\w\d./?=#%+&-]+)\)")
+
+def tree(root):
+ text_counter = Counter()
+ link_counter = Counter()
+ domain_counter = Counter()
+ for path, _dirs, filenames in os.walk(root):
+ for filename in filenames:
+ if not filename.endswith(".md"):
+ continue
+ if "drafts" in path:
+ continue
+ filepath = os.path.join(path, filename)
+ with open(filepath) as f:
+ content = f.read()
+ matches = url_regex.finditer(content)
+ for match in matches:
+ text = match.group(1)
+ link = match.group(2)
+ parsed = urlparse(link)
+ text_counter[text] += 1
+ link_counter[link] += 1
+ domain_counter[parsed.netloc] += 1
+ print(text_counter)
+ print(link_counter)
+ print(domain_counter)
+ print(len(domain_counter))
+
+if __name__ == "__main__":
+ tree(sys.argv[1])