#!/usr/bin/env python3 """ Love Letters — Display random historic love letters from Project Gutenberg. Sources: • Henry VIII to Anne Boleyn (c. 1527–1528) • Mary Wollstonecraft to Gilbert Imlay (1793–1795) • Letters of Abelard and Heloise (12th century) • Napoleon Bonaparte to Josephine (1796–1812) • John Keats to Fanny Brawne (1819–1820) """ import json import os import random import re import sys import textwrap import urllib.request CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache") SOURCES = [ { "id": "henry_viii", "title": "The Love Letters of Henry VIII to Anne Boleyn", "author": "Henry VIII", "recipient": "Anne Boleyn", "year": "c. 1527–1528", "url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt", "gutenberg_id": 32155, }, { "id": "wollstonecraft", "title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay", "author": "Mary Wollstonecraft", "recipient": "Gilbert Imlay", "year": "1793–1795", "url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt", "gutenberg_id": 34413, }, { "id": "abelard_heloise", "title": "Letters of Abelard and Heloise", "author": "Abelard & Heloise", "recipient": "each other", "year": "12th century", "url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt", "gutenberg_id": 35977, }, { "id": "napoleon", "title": "Napoleon's Letters to Josephine", "author": "Napoleon Bonaparte", "recipient": "Josephine", "year": "1796–1812", "url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt", "gutenberg_id": 37499, }, { "id": "keats_brawne", "title": "Letters of John Keats to Fanny Brawne", "author": "John Keats", "recipient": "Fanny Brawne", "year": "1819–1820", "url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt", "gutenberg_id": 60433, }, ] SEPARATOR = "─" * 60 def download_text(url: str) -> str: """Download a plain-text file from Project Gutenberg.""" req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"}) with urllib.request.urlopen(req, timeout=30) as resp: return resp.read().decode("utf-8", errors="replace") def strip_gutenberg_header_footer(text: str) -> str: """Remove the Project Gutenberg header and footer boilerplate.""" start_markers = [ "*** START OF THE PROJECT GUTENBERG EBOOK", "*** START OF THIS PROJECT GUTENBERG EBOOK", "***START OF THE PROJECT GUTENBERG EBOOK", ] end_markers = [ "*** END OF THE PROJECT GUTENBERG EBOOK", "*** END OF THIS PROJECT GUTENBERG EBOOK", "***END OF THE PROJECT GUTENBERG EBOOK", "End of the Project Gutenberg EBook", "End of Project Gutenberg", ] for marker in start_markers: idx = text.find(marker) if idx != -1: text = text[idx + len(marker) :] nl = text.find("\n") if nl != -1: text = text[nl + 1 :] break for marker in end_markers: idx = text.find(marker) if idx != -1: text = text[:idx] break return text.strip() # --------------------------------------------------------------------------- # Per-source letter extraction # --------------------------------------------------------------------------- def extract_henry_viii(text: str) -> list[dict]: """Extract individual letters from the Henry VIII collection.""" text = strip_gutenberg_header_footer(text) text = text.replace("\r\n", "\n") # Letters use written-out ordinals: "Letter First", "Letter Second", etc. parts = re.split( r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|" r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|" r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)", text, ) letters = [] for part in parts: part = part.strip() if not part or len(part) < 80: continue m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE) if not m: continue heading = m.group(1) body = part[m.end():].strip() # Remove notes section at the end notes_idx = body.find("\nNotes\n") if notes_idx == -1: notes_idx = body.find("\nNOTES\n") if notes_idx != -1: body = body[:notes_idx].strip() author = "Henry VIII" recipient = "Anne Boleyn" if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]: author = "Anne Boleyn" recipient = "Cardinal Wolsey" if len(body) > 50: letters.append({ "heading": heading, "body": body, "author": author, "recipient": recipient, "source": "The Love Letters of Henry VIII to Anne Boleyn", "period": "c. 1527–1528", }) return letters def extract_wollstonecraft(text: str) -> list[dict]: """Extract individual letters from the Wollstonecraft collection.""" text = strip_gutenberg_header_footer(text) text = text.replace("\r\n", "\n") parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE) letters = [] for part in parts: part = part.strip() if not part or len(part) < 80: continue m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE) heading = m.group(1) if m else "" body = part[m.end():].strip() if m else part if len(body) > 50: letters.append({ "heading": heading, "body": body, "author": "Mary Wollstonecraft", "recipient": "Gilbert Imlay", "source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay", "period": "1793–1795", }) return letters def extract_abelard_heloise(text: str) -> list[dict]: """Extract individual letters from the Abelard & Heloise collection.""" text = strip_gutenberg_header_footer(text) text = text.replace("\r\n", "\n") parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE) letters = [] for part in parts: part = part.strip() if not part or len(part) < 120: continue m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE) if not m: continue heading = m.group(1) body = part[m.end():].strip() author = "Abelard & Heloise" recipient = "each other" lower = body[:300].lower() if "heloise to abelard" in lower: author = "Heloise" recipient = "Abelard" elif "abelard to heloise" in lower: author = "Abelard" recipient = "Heloise" if len(body) > 50: letters.append({ "heading": heading, "body": body, "author": author, "recipient": recipient, "source": "Letters of Abelard and Heloise", "period": "12th century", }) return letters def extract_napoleon(text: str) -> list[dict]: """Extract individual letters from Napoleon's letters to Josephine.""" text = strip_gutenberg_header_footer(text) text = text.replace("\r\n", "\n") # Letters are headed "No. 1.", "No. 2.", etc. on their own line parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text) letters = [] for part in parts: part = part.strip() if not part or len(part) < 100: continue m = re.match(r"(No\.\s*\d+\.)\s*\n", part) if not m: continue heading = m.group(1) body = part[m.end():].strip() # Skip table of contents entries (short lines with page numbers) if len(body) < 80: continue letters.append({ "heading": heading, "body": body, "author": "Napoleon Bonaparte", "recipient": "Josephine", "source": "Napoleon's Letters to Josephine, 1796–1812", "period": "1796–1812", }) return letters def extract_keats_brawne(text: str) -> list[dict]: """Extract individual letters from Keats to Fanny Brawne.""" text = strip_gutenberg_header_footer(text) text = text.replace("\r\n", "\n") # Letters are numbered with Roman numerals on their own line: "I.", "II.", etc. parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text) letters = [] for part in parts: part = part.strip() if not part or len(part) < 100: continue m = re.match(r"([IVXLC]+)\.\s*\n", part) if not m: continue heading = f"Letter {m.group(1)}" body = part[m.end():].strip() # Remove editorial footnotes in brackets if len(body) > 50: letters.append({ "heading": heading, "body": body, "author": "John Keats", "recipient": "Fanny Brawne", "source": "Letters of John Keats to Fanny Brawne", "period": "1819–1820", }) return letters EXTRACTORS = { "henry_viii": extract_henry_viii, "wollstonecraft": extract_wollstonecraft, "abelard_heloise": extract_abelard_heloise, "napoleon": extract_napoleon, "keats_brawne": extract_keats_brawne, } # --------------------------------------------------------------------------- # Caching # --------------------------------------------------------------------------- def get_cache_path(source_id: str) -> str: return os.path.join(CACHE_DIR, f"{source_id}.json") def load_cached_letters(source_id: str) -> list[dict] | None: path = get_cache_path(source_id) if os.path.exists(path): with open(path, "r", encoding="utf-8") as f: return json.load(f) return None def save_cached_letters(source_id: str, letters: list[dict]) -> None: os.makedirs(CACHE_DIR, exist_ok=True) with open(get_cache_path(source_id), "w", encoding="utf-8") as f: json.dump(letters, f, ensure_ascii=False, indent=2) # --------------------------------------------------------------------------- # Main logic # --------------------------------------------------------------------------- def fetch_and_parse(source: dict) -> list[dict]: """Download, extract, and cache letters for a given source.""" cached = load_cached_letters(source["id"]) if cached is not None: return cached print(f" Downloading: {source['title']}…", flush=True) try: raw = download_text(source["url"]) except Exception as e: print(f" ⚠ Failed to download {source['title']}: {e}") return [] extractor = EXTRACTORS.get(source["id"]) if extractor is None: return [] letters = extractor(raw) if letters: save_cached_letters(source["id"], letters) return letters def load_all_letters() -> list[dict]: """Load letters from all sources, downloading as needed.""" all_letters: list[dict] = [] for source in SOURCES: letters = fetch_and_parse(source) all_letters.extend(letters) return all_letters def wrap_text(text: str, width: int = 78) -> str: """Word-wrap text while preserving paragraph breaks.""" paragraphs = re.split(r"\n\s*\n", text) wrapped = [] for para in paragraphs: para = " ".join(para.split()) wrapped.append(textwrap.fill(para, width=width)) return "\n\n".join(wrapped) def truncate_letter(body: str, max_chars: int = 3000) -> str: """Truncate very long letters with an ellipsis note.""" if len(body) <= max_chars: return body truncated = body[:max_chars] last_period = truncated.rfind(".") if last_period > max_chars // 2: truncated = truncated[: last_period + 1] return truncated + "\n\n […letter continues…]" def display_letter(letter: dict) -> None: """Pretty-print a single love letter to the terminal.""" print() print(SEPARATOR) print(f" ✉ {letter['author']} → {letter['recipient']}") if letter.get("heading"): print(f" {letter['heading']}") print(f" ({letter['period']})") print(SEPARATOR) print() body = truncate_letter(letter["body"]) print(wrap_text(body)) print() print(SEPARATOR) print(f" Source: {letter['source']}") print(f" Via Project Gutenberg • gutenberg.org") print(SEPARATOR) print() def list_sources() -> None: """Print available letter collections.""" print("\n Available collections:\n") for i, src in enumerate(SOURCES, 1): print(f" {i}. {src['title']}") print(f" {src['author']} → {src['recipient']} ({src['year']})") print(f" gutenberg.org/ebooks/{src['gutenberg_id']}") print() def main() -> None: import argparse parser = argparse.ArgumentParser( description="Display random historic love letters from Project Gutenberg.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=textwrap.dedent("""\ examples: %(prog)s Show a random love letter %(prog)s -n 3 Show 3 random love letters %(prog)s --list List available collections %(prog)s --refresh Re-download all sources """), ) parser.add_argument( "-n", "--count", type=int, default=1, metavar="N", help="number of letters to display (default: 1)", ) parser.add_argument( "--list", action="store_true", help="list available letter collections", ) parser.add_argument( "--refresh", action="store_true", help="clear cache and re-download all sources", ) parser.add_argument( "--source", type=str, metavar="ID", choices=[s["id"] for s in SOURCES], help="only show letters from a specific source", ) args = parser.parse_args() if args.list: list_sources() return if args.refresh: import shutil if os.path.isdir(CACHE_DIR): shutil.rmtree(CACHE_DIR) print(" Cache cleared.") print("\n 💌 Love Letters — loading collections…\n") all_letters = load_all_letters() if not all_letters: print(" No letters could be loaded. Check your internet connection.") sys.exit(1) if args.source: all_letters = [l for l in all_letters if any( s["id"] == args.source and l["source"] == s["title"] for s in SOURCES )] if not all_letters: print(f" No letters found for source '{args.source}'.") sys.exit(1) count = min(args.count, len(all_letters)) chosen = random.sample(all_letters, count) for letter in chosen: display_letter(letter) if __name__ == "__main__": main()