You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""Generate combined JSON data files for the hicalsoft.github.io web UI.
Reads individual JSON files from letters/ and poetry/ directories and
produces the combined letters.json and poetry.json used by the web pages.
Usage:
python3 generate_web_data.py # Generate both
python3 generate_web_data.py --letters # Letters only
python3 generate_web_data.py --poetry # Poetry only
"""
import argparse
import glob
import json
import os
import sys
LETTERS_SRC = "letters"
POETRY_SRC = "poetry"
LETTERS_OUT = os.path.join("hicalsoft.github.io", "letters", "data", "letters.json")
POETRY_OUT = os.path.join("hicalsoft.github.io", "poetry", "data", "poetry.json")
def generate_letters():
"""Combine individual letter JSON files into one for the web UI."""
letters = []
for f in sorted(glob.glob(os.path.join(LETTERS_SRC, "*.json"))):
with open(f) as fh:
data = json.load(fh)
for item in data:
letters.append({
"h": item.get("heading", ""),
"b": item["body"],
"a": item["author"],
"r": item.get("recipient", ""),
"s": item["source"],
"p": item.get("period", ""),
})
authors = {}
for l in letters:
authors[l["a"]] = authors.get(l["a"], 0) + 1
out = {"authors": authors, "letters": letters}
os.makedirs(os.path.dirname(LETTERS_OUT), exist_ok=True)
with open(LETTERS_OUT, "w") as f:
json.dump(out, f, separators=(",", ":"))
size_mb = os.path.getsize(LETTERS_OUT) / 1024 / 1024
print(f"Letters: {len(letters)} letters from {len(authors)} authors ({size_mb:.2f} MB)")
print(f"{LETTERS_OUT}")
def generate_poetry():
"""Combine individual poetry JSON files into one for the web UI."""
poems = []
for f in sorted(glob.glob(os.path.join(POETRY_SRC, "*.json"))):
with open(f) as fh:
data = json.load(fh)
for item in data:
poems.append({
"t": item.get("title", ""),
"b": item["body"],
"a": item["author"],
"s": item["source"],
"p": item.get("period", ""),
})
authors = {}
for p in poems:
authors[p["a"]] = authors.get(p["a"], 0) + 1
out = {"authors": authors, "poems": poems}
os.makedirs(os.path.dirname(POETRY_OUT), exist_ok=True)
with open(POETRY_OUT, "w") as f:
json.dump(out, f, separators=(",", ":"))
size_mb = os.path.getsize(POETRY_OUT) / 1024 / 1024
print(f"Poetry: {len(poems)} poems from {len(authors)} poets ({size_mb:.2f} MB)")
print(f"{POETRY_OUT}")
def main():
parser = argparse.ArgumentParser(
description="Generate combined JSON data for the hicalsoft.github.io web UI."
)
parser.add_argument("--letters", action="store_true", help="Generate letters.json only")
parser.add_argument("--poetry", action="store_true", help="Generate poetry.json only")
args = parser.parse_args()
# If neither flag is set, generate both
do_letters = args.letters or not (args.letters or args.poetry)
do_poetry = args.poetry or not (args.letters or args.poetry)
if do_letters:
if not os.path.isdir(LETTERS_SRC):
print(f"Error: {LETTERS_SRC}/ directory not found. Run download_letters.py first.",
file=sys.stderr)
sys.exit(1)
generate_letters()
if do_poetry:
if not os.path.isdir(POETRY_SRC):
print(f"Error: {POETRY_SRC}/ directory not found. Run download_poetry.py first.",
file=sys.stderr)
sys.exit(1)
generate_poetry()
if __name__ == "__main__":
main()