Add love letters app with Project Gutenberg sources

Python app that downloads and displays random historic love letters from five authentic collections on Project Gutenberg: - Henry VIII to Anne Boleyn (c. 1527-1528) - Mary Wollstonecraft to Gilbert Imlay (1793-1795) - Abelard & Heloise (12th century) - Napoleon Bonaparte to Josephine (1796-1812) - John Keats to Fanny Brawne (1819-1820) Features: local caching, source filtering, multi-letter display, text wrapping, and long letter truncation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
4 months ago · 7fa79a1ba7
parent f45fe56f94
commit 7fa79a1ba7
3 changed files with 500 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -160,3 +160,6 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

+# Love Letters app cache
+.letter_cache/
+
--- a/README.md
+++ b/README.md
@ -1,2 +1,29 @@
 # letters

+A Python app that displays random historic love letters from authentic sources, downloaded from [Project Gutenberg](https://www.gutenberg.org/).
+
+## Usage
+
+```bash
+python3 love_letters.py              # Show a random love letter
+python3 love_letters.py -n 3         # Show 3 random love letters
+python3 love_letters.py --list       # List available collections
+python3 love_letters.py --source keats_brawne   # Filter by source
+python3 love_letters.py --refresh    # Re-download all sources
+```
+
+## Sources
+
+| Collection | Author → Recipient | Period |
+|---|---|---|
+| The Love Letters of Henry VIII to Anne Boleyn | Henry VIII → Anne Boleyn | c. 1527–1528 |
+| The Love Letters of Mary Wollstonecraft to Gilbert Imlay | Mary Wollstonecraft → Gilbert Imlay | 1793–1795 |
+| Letters of Abelard and Heloise | Abelard & Heloise → each other | 12th century |
+| Napoleon's Letters to Josephine | Napoleon Bonaparte → Josephine | 1796–1812 |
+| Letters of John Keats to Fanny Brawne | John Keats → Fanny Brawne | 1819–1820 |
+
+All texts are sourced from [Project Gutenberg](https://www.gutenberg.org/) and are in the public domain.
+
+## Requirements
+
+Python 3.10+ (no external dependencies). An internet connection is required on first run to download the letter collections; they are cached locally after that.
--- a/love_letters.py
+++ b/love_letters.py
@ -0,0 +1,470 @@
+#!/usr/bin/env python3
+"""
+Love Letters — Display random historic love letters from Project Gutenberg.
+
+Sources:
+  • Henry VIII to Anne Boleyn (c. 1527–1528)
+  • Mary Wollstonecraft to Gilbert Imlay (1793–1795)
+  • Letters of Abelard and Heloise (12th century)
+  • Napoleon Bonaparte to Josephine (1796–1812)
+  • John Keats to Fanny Brawne (1819–1820)
+"""
+
+import json
+import os
+import random
+import re
+import sys
+import textwrap
+import urllib.request
+
+CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache")
+
+SOURCES = [
+    {
+        "id": "henry_viii",
+        "title": "The Love Letters of Henry VIII to Anne Boleyn",
+        "author": "Henry VIII",
+        "recipient": "Anne Boleyn",
+        "year": "c. 1527–1528",
+        "url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt",
+        "gutenberg_id": 32155,
+    },
+    {
+        "id": "wollstonecraft",
+        "title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
+        "author": "Mary Wollstonecraft",
+        "recipient": "Gilbert Imlay",
+        "year": "1793–1795",
+        "url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt",
+        "gutenberg_id": 34413,
+    },
+    {
+        "id": "abelard_heloise",
+        "title": "Letters of Abelard and Heloise",
+        "author": "Abelard & Heloise",
+        "recipient": "each other",
+        "year": "12th century",
+        "url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt",
+        "gutenberg_id": 35977,
+    },
+    {
+        "id": "napoleon",
+        "title": "Napoleon's Letters to Josephine",
+        "author": "Napoleon Bonaparte",
+        "recipient": "Josephine",
+        "year": "1796–1812",
+        "url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt",
+        "gutenberg_id": 37499,
+    },
+    {
+        "id": "keats_brawne",
+        "title": "Letters of John Keats to Fanny Brawne",
+        "author": "John Keats",
+        "recipient": "Fanny Brawne",
+        "year": "1819–1820",
+        "url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt",
+        "gutenberg_id": 60433,
+    },
+]
+
+SEPARATOR = "─" * 60
+
+
+def download_text(url: str) -> str:
+    """Download a plain-text file from Project Gutenberg."""
+    req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8", errors="replace")
+
+
+def strip_gutenberg_header_footer(text: str) -> str:
+    """Remove the Project Gutenberg header and footer boilerplate."""
+    start_markers = [
+        "*** START OF THE PROJECT GUTENBERG EBOOK",
+        "*** START OF THIS PROJECT GUTENBERG EBOOK",
+        "***START OF THE PROJECT GUTENBERG EBOOK",
+    ]
+    end_markers = [
+        "*** END OF THE PROJECT GUTENBERG EBOOK",
+        "*** END OF THIS PROJECT GUTENBERG EBOOK",
+        "***END OF THE PROJECT GUTENBERG EBOOK",
+        "End of the Project Gutenberg EBook",
+        "End of Project Gutenberg",
+    ]
+
+    for marker in start_markers:
+        idx = text.find(marker)
+        if idx != -1:
+            text = text[idx + len(marker) :]
+            nl = text.find("\n")
+            if nl != -1:
+                text = text[nl + 1 :]
+            break
+
+    for marker in end_markers:
+        idx = text.find(marker)
+        if idx != -1:
+            text = text[:idx]
+            break
+
+    return text.strip()
+
+
+# ---------------------------------------------------------------------------
+# Per-source letter extraction
+# ---------------------------------------------------------------------------
+
+def extract_henry_viii(text: str) -> list[dict]:
+    """Extract individual letters from the Henry VIII collection."""
+    text = strip_gutenberg_header_footer(text)
+    text = text.replace("\r\n", "\n")
+    # Letters use written-out ordinals: "Letter First", "Letter Second", etc.
+    parts = re.split(
+        r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|"
+        r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|"
+        r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)",
+        text,
+    )
+    letters = []
+    for part in parts:
+        part = part.strip()
+        if not part or len(part) < 80:
+            continue
+        m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE)
+        if not m:
+            continue
+        heading = m.group(1)
+        body = part[m.end():].strip()
+        # Remove notes section at the end
+        notes_idx = body.find("\nNotes\n")
+        if notes_idx == -1:
+            notes_idx = body.find("\nNOTES\n")
+        if notes_idx != -1:
+            body = body[:notes_idx].strip()
+        author = "Henry VIII"
+        recipient = "Anne Boleyn"
+        if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]:
+            author = "Anne Boleyn"
+            recipient = "Cardinal Wolsey"
+        if len(body) > 50:
+            letters.append({
+                "heading": heading,
+                "body": body,
+                "author": author,
+                "recipient": recipient,
+                "source": "The Love Letters of Henry VIII to Anne Boleyn",
+                "period": "c. 1527–1528",
+            })
+    return letters
+
+
+def extract_wollstonecraft(text: str) -> list[dict]:
+    """Extract individual letters from the Wollstonecraft collection."""
+    text = strip_gutenberg_header_footer(text)
+    text = text.replace("\r\n", "\n")
+    parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE)
+    letters = []
+    for part in parts:
+        part = part.strip()
+        if not part or len(part) < 80:
+            continue
+        m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE)
+        heading = m.group(1) if m else ""
+        body = part[m.end():].strip() if m else part
+        if len(body) > 50:
+            letters.append({
+                "heading": heading,
+                "body": body,
+                "author": "Mary Wollstonecraft",
+                "recipient": "Gilbert Imlay",
+                "source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
+                "period": "1793–1795",
+            })
+    return letters
+
+
+def extract_abelard_heloise(text: str) -> list[dict]:
+    """Extract individual letters from the Abelard & Heloise collection."""
+    text = strip_gutenberg_header_footer(text)
+    text = text.replace("\r\n", "\n")
+    parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE)
+    letters = []
+    for part in parts:
+        part = part.strip()
+        if not part or len(part) < 120:
+            continue
+        m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE)
+        if not m:
+            continue
+        heading = m.group(1)
+        body = part[m.end():].strip()
+        author = "Abelard & Heloise"
+        recipient = "each other"
+        lower = body[:300].lower()
+        if "heloise to abelard" in lower:
+            author = "Heloise"
+            recipient = "Abelard"
+        elif "abelard to heloise" in lower:
+            author = "Abelard"
+            recipient = "Heloise"
+        if len(body) > 50:
+            letters.append({
+                "heading": heading,
+                "body": body,
+                "author": author,
+                "recipient": recipient,
+                "source": "Letters of Abelard and Heloise",
+                "period": "12th century",
+            })
+    return letters
+
+
+def extract_napoleon(text: str) -> list[dict]:
+    """Extract individual letters from Napoleon's letters to Josephine."""
+    text = strip_gutenberg_header_footer(text)
+    text = text.replace("\r\n", "\n")
+    # Letters are headed "No. 1.", "No. 2.", etc. on their own line
+    parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text)
+    letters = []
+    for part in parts:
+        part = part.strip()
+        if not part or len(part) < 100:
+            continue
+        m = re.match(r"(No\.\s*\d+\.)\s*\n", part)
+        if not m:
+            continue
+        heading = m.group(1)
+        body = part[m.end():].strip()
+        # Skip table of contents entries (short lines with page numbers)
+        if len(body) < 80:
+            continue
+        letters.append({
+            "heading": heading,
+            "body": body,
+            "author": "Napoleon Bonaparte",
+            "recipient": "Josephine",
+            "source": "Napoleon's Letters to Josephine, 1796–1812",
+            "period": "1796–1812",
+        })
+    return letters
+
+
+def extract_keats_brawne(text: str) -> list[dict]:
+    """Extract individual letters from Keats to Fanny Brawne."""
+    text = strip_gutenberg_header_footer(text)
+    text = text.replace("\r\n", "\n")
+    # Letters are numbered with Roman numerals on their own line: "I.", "II.", etc.
+    parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text)
+    letters = []
+    for part in parts:
+        part = part.strip()
+        if not part or len(part) < 100:
+            continue
+        m = re.match(r"([IVXLC]+)\.\s*\n", part)
+        if not m:
+            continue
+        heading = f"Letter {m.group(1)}"
+        body = part[m.end():].strip()
+        # Remove editorial footnotes in brackets
+        if len(body) > 50:
+            letters.append({
+                "heading": heading,
+                "body": body,
+                "author": "John Keats",
+                "recipient": "Fanny Brawne",
+                "source": "Letters of John Keats to Fanny Brawne",
+                "period": "1819–1820",
+            })
+    return letters
+
+
+EXTRACTORS = {
+    "henry_viii": extract_henry_viii,
+    "wollstonecraft": extract_wollstonecraft,
+    "abelard_heloise": extract_abelard_heloise,
+    "napoleon": extract_napoleon,
+    "keats_brawne": extract_keats_brawne,
+}
+
+
+# ---------------------------------------------------------------------------
+# Caching
+# ---------------------------------------------------------------------------
+
+def get_cache_path(source_id: str) -> str:
+    return os.path.join(CACHE_DIR, f"{source_id}.json")
+
+
+def load_cached_letters(source_id: str) -> list[dict] | None:
+    path = get_cache_path(source_id)
+    if os.path.exists(path):
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    return None
+
+
+def save_cached_letters(source_id: str, letters: list[dict]) -> None:
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    with open(get_cache_path(source_id), "w", encoding="utf-8") as f:
+        json.dump(letters, f, ensure_ascii=False, indent=2)
+
+
+# ---------------------------------------------------------------------------
+# Main logic
+# ---------------------------------------------------------------------------
+
+def fetch_and_parse(source: dict) -> list[dict]:
+    """Download, extract, and cache letters for a given source."""
+    cached = load_cached_letters(source["id"])
+    if cached is not None:
+        return cached
+
+    print(f"  Downloading: {source['title']}…", flush=True)
+    try:
+        raw = download_text(source["url"])
+    except Exception as e:
+        print(f"  ⚠  Failed to download {source['title']}: {e}")
+        return []
+
+    extractor = EXTRACTORS.get(source["id"])
+    if extractor is None:
+        return []
+
+    letters = extractor(raw)
+    if letters:
+        save_cached_letters(source["id"], letters)
+    return letters
+
+
+def load_all_letters() -> list[dict]:
+    """Load letters from all sources, downloading as needed."""
+    all_letters: list[dict] = []
+    for source in SOURCES:
+        letters = fetch_and_parse(source)
+        all_letters.extend(letters)
+    return all_letters
+
+
+def wrap_text(text: str, width: int = 78) -> str:
+    """Word-wrap text while preserving paragraph breaks."""
+    paragraphs = re.split(r"\n\s*\n", text)
+    wrapped = []
+    for para in paragraphs:
+        para = " ".join(para.split())
+        wrapped.append(textwrap.fill(para, width=width))
+    return "\n\n".join(wrapped)
+
+
+def truncate_letter(body: str, max_chars: int = 3000) -> str:
+    """Truncate very long letters with an ellipsis note."""
+    if len(body) <= max_chars:
+        return body
+    truncated = body[:max_chars]
+    last_period = truncated.rfind(".")
+    if last_period > max_chars // 2:
+        truncated = truncated[: last_period + 1]
+    return truncated + "\n\n  […letter continues…]"
+
+
+def display_letter(letter: dict) -> None:
+    """Pretty-print a single love letter to the terminal."""
+    print()
+    print(SEPARATOR)
+    print(f"  ✉  {letter['author']}  →  {letter['recipient']}")
+    if letter.get("heading"):
+        print(f"     {letter['heading']}")
+    print(f"     ({letter['period']})")
+    print(SEPARATOR)
+    print()
+
+    body = truncate_letter(letter["body"])
+    print(wrap_text(body))
+
+    print()
+    print(SEPARATOR)
+    print(f"  Source: {letter['source']}")
+    print(f"  Via Project Gutenberg  •  gutenberg.org")
+    print(SEPARATOR)
+    print()
+
+
+def list_sources() -> None:
+    """Print available letter collections."""
+    print("\n  Available collections:\n")
+    for i, src in enumerate(SOURCES, 1):
+        print(f"  {i}. {src['title']}")
+        print(f"     {src['author']} → {src['recipient']} ({src['year']})")
+        print(f"     gutenberg.org/ebooks/{src['gutenberg_id']}")
+        print()
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Display random historic love letters from Project Gutenberg.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=textwrap.dedent("""\
+            examples:
+              %(prog)s              Show a random love letter
+              %(prog)s -n 3         Show 3 random love letters
+              %(prog)s --list       List available collections
+              %(prog)s --refresh    Re-download all sources
+        """),
+    )
+    parser.add_argument(
+        "-n", "--count", type=int, default=1, metavar="N",
+        help="number of letters to display (default: 1)",
+    )
+    parser.add_argument(
+        "--list", action="store_true",
+        help="list available letter collections",
+    )
+    parser.add_argument(
+        "--refresh", action="store_true",
+        help="clear cache and re-download all sources",
+    )
+    parser.add_argument(
+        "--source", type=str, metavar="ID",
+        choices=[s["id"] for s in SOURCES],
+        help="only show letters from a specific source",
+    )
+
+    args = parser.parse_args()
+
+    if args.list:
+        list_sources()
+        return
+
+    if args.refresh:
+        import shutil
+        if os.path.isdir(CACHE_DIR):
+            shutil.rmtree(CACHE_DIR)
+        print("  Cache cleared.")
+
+    print("\n  💌 Love Letters — loading collections…\n")
+    all_letters = load_all_letters()
+
+    if not all_letters:
+        print("  No letters could be loaded. Check your internet connection.")
+        sys.exit(1)
+
+    if args.source:
+        all_letters = [l for l in all_letters if any(
+            s["id"] == args.source and l["source"] == s["title"]
+            for s in SOURCES
+        )]
+        if not all_letters:
+            print(f"  No letters found for source '{args.source}'.")
+            sys.exit(1)
+
+    count = min(args.count, len(all_letters))
+    chosen = random.sample(all_letters, count)
+
+    for letter in chosen:
+        display_letter(letter)
+
+
+if __name__ == "__main__":
+    main()