diff --git a/.gitignore b/.gitignore index 5d381cc..63fde26 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +# Love Letters app cache +.letter_cache/ + diff --git a/README.md b/README.md index 286c9d9..4bf2447 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,29 @@ # letters +A Python app that displays random historic love letters from authentic sources, downloaded from [Project Gutenberg](https://www.gutenberg.org/). + +## Usage + +```bash +python3 love_letters.py # Show a random love letter +python3 love_letters.py -n 3 # Show 3 random love letters +python3 love_letters.py --list # List available collections +python3 love_letters.py --source keats_brawne # Filter by source +python3 love_letters.py --refresh # Re-download all sources +``` + +## Sources + +| Collection | Author → Recipient | Period | +|---|---|---| +| The Love Letters of Henry VIII to Anne Boleyn | Henry VIII → Anne Boleyn | c. 1527–1528 | +| The Love Letters of Mary Wollstonecraft to Gilbert Imlay | Mary Wollstonecraft → Gilbert Imlay | 1793–1795 | +| Letters of Abelard and Heloise | Abelard & Heloise → each other | 12th century | +| Napoleon's Letters to Josephine | Napoleon Bonaparte → Josephine | 1796–1812 | +| Letters of John Keats to Fanny Brawne | John Keats → Fanny Brawne | 1819–1820 | + +All texts are sourced from [Project Gutenberg](https://www.gutenberg.org/) and are in the public domain. + +## Requirements + +Python 3.10+ (no external dependencies). An internet connection is required on first run to download the letter collections; they are cached locally after that. \ No newline at end of file diff --git a/love_letters.py b/love_letters.py new file mode 100644 index 0000000..866b95b --- /dev/null +++ b/love_letters.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +""" +Love Letters — Display random historic love letters from Project Gutenberg. + +Sources: + • Henry VIII to Anne Boleyn (c. 1527–1528) + • Mary Wollstonecraft to Gilbert Imlay (1793–1795) + • Letters of Abelard and Heloise (12th century) + • Napoleon Bonaparte to Josephine (1796–1812) + • John Keats to Fanny Brawne (1819–1820) +""" + +import json +import os +import random +import re +import sys +import textwrap +import urllib.request + +CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache") + +SOURCES = [ + { + "id": "henry_viii", + "title": "The Love Letters of Henry VIII to Anne Boleyn", + "author": "Henry VIII", + "recipient": "Anne Boleyn", + "year": "c. 1527–1528", + "url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt", + "gutenberg_id": 32155, + }, + { + "id": "wollstonecraft", + "title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay", + "author": "Mary Wollstonecraft", + "recipient": "Gilbert Imlay", + "year": "1793–1795", + "url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt", + "gutenberg_id": 34413, + }, + { + "id": "abelard_heloise", + "title": "Letters of Abelard and Heloise", + "author": "Abelard & Heloise", + "recipient": "each other", + "year": "12th century", + "url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt", + "gutenberg_id": 35977, + }, + { + "id": "napoleon", + "title": "Napoleon's Letters to Josephine", + "author": "Napoleon Bonaparte", + "recipient": "Josephine", + "year": "1796–1812", + "url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt", + "gutenberg_id": 37499, + }, + { + "id": "keats_brawne", + "title": "Letters of John Keats to Fanny Brawne", + "author": "John Keats", + "recipient": "Fanny Brawne", + "year": "1819–1820", + "url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt", + "gutenberg_id": 60433, + }, +] + +SEPARATOR = "─" * 60 + + +def download_text(url: str) -> str: + """Download a plain-text file from Project Gutenberg.""" + req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return resp.read().decode("utf-8", errors="replace") + + +def strip_gutenberg_header_footer(text: str) -> str: + """Remove the Project Gutenberg header and footer boilerplate.""" + start_markers = [ + "*** START OF THE PROJECT GUTENBERG EBOOK", + "*** START OF THIS PROJECT GUTENBERG EBOOK", + "***START OF THE PROJECT GUTENBERG EBOOK", + ] + end_markers = [ + "*** END OF THE PROJECT GUTENBERG EBOOK", + "*** END OF THIS PROJECT GUTENBERG EBOOK", + "***END OF THE PROJECT GUTENBERG EBOOK", + "End of the Project Gutenberg EBook", + "End of Project Gutenberg", + ] + + for marker in start_markers: + idx = text.find(marker) + if idx != -1: + text = text[idx + len(marker) :] + nl = text.find("\n") + if nl != -1: + text = text[nl + 1 :] + break + + for marker in end_markers: + idx = text.find(marker) + if idx != -1: + text = text[:idx] + break + + return text.strip() + + +# --------------------------------------------------------------------------- +# Per-source letter extraction +# --------------------------------------------------------------------------- + +def extract_henry_viii(text: str) -> list[dict]: + """Extract individual letters from the Henry VIII collection.""" + text = strip_gutenberg_header_footer(text) + text = text.replace("\r\n", "\n") + # Letters use written-out ordinals: "Letter First", "Letter Second", etc. + parts = re.split( + r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|" + r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|" + r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)", + text, + ) + letters = [] + for part in parts: + part = part.strip() + if not part or len(part) < 80: + continue + m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE) + if not m: + continue + heading = m.group(1) + body = part[m.end():].strip() + # Remove notes section at the end + notes_idx = body.find("\nNotes\n") + if notes_idx == -1: + notes_idx = body.find("\nNOTES\n") + if notes_idx != -1: + body = body[:notes_idx].strip() + author = "Henry VIII" + recipient = "Anne Boleyn" + if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]: + author = "Anne Boleyn" + recipient = "Cardinal Wolsey" + if len(body) > 50: + letters.append({ + "heading": heading, + "body": body, + "author": author, + "recipient": recipient, + "source": "The Love Letters of Henry VIII to Anne Boleyn", + "period": "c. 1527–1528", + }) + return letters + + +def extract_wollstonecraft(text: str) -> list[dict]: + """Extract individual letters from the Wollstonecraft collection.""" + text = strip_gutenberg_header_footer(text) + text = text.replace("\r\n", "\n") + parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE) + letters = [] + for part in parts: + part = part.strip() + if not part or len(part) < 80: + continue + m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE) + heading = m.group(1) if m else "" + body = part[m.end():].strip() if m else part + if len(body) > 50: + letters.append({ + "heading": heading, + "body": body, + "author": "Mary Wollstonecraft", + "recipient": "Gilbert Imlay", + "source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay", + "period": "1793–1795", + }) + return letters + + +def extract_abelard_heloise(text: str) -> list[dict]: + """Extract individual letters from the Abelard & Heloise collection.""" + text = strip_gutenberg_header_footer(text) + text = text.replace("\r\n", "\n") + parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE) + letters = [] + for part in parts: + part = part.strip() + if not part or len(part) < 120: + continue + m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE) + if not m: + continue + heading = m.group(1) + body = part[m.end():].strip() + author = "Abelard & Heloise" + recipient = "each other" + lower = body[:300].lower() + if "heloise to abelard" in lower: + author = "Heloise" + recipient = "Abelard" + elif "abelard to heloise" in lower: + author = "Abelard" + recipient = "Heloise" + if len(body) > 50: + letters.append({ + "heading": heading, + "body": body, + "author": author, + "recipient": recipient, + "source": "Letters of Abelard and Heloise", + "period": "12th century", + }) + return letters + + +def extract_napoleon(text: str) -> list[dict]: + """Extract individual letters from Napoleon's letters to Josephine.""" + text = strip_gutenberg_header_footer(text) + text = text.replace("\r\n", "\n") + # Letters are headed "No. 1.", "No. 2.", etc. on their own line + parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text) + letters = [] + for part in parts: + part = part.strip() + if not part or len(part) < 100: + continue + m = re.match(r"(No\.\s*\d+\.)\s*\n", part) + if not m: + continue + heading = m.group(1) + body = part[m.end():].strip() + # Skip table of contents entries (short lines with page numbers) + if len(body) < 80: + continue + letters.append({ + "heading": heading, + "body": body, + "author": "Napoleon Bonaparte", + "recipient": "Josephine", + "source": "Napoleon's Letters to Josephine, 1796–1812", + "period": "1796–1812", + }) + return letters + + +def extract_keats_brawne(text: str) -> list[dict]: + """Extract individual letters from Keats to Fanny Brawne.""" + text = strip_gutenberg_header_footer(text) + text = text.replace("\r\n", "\n") + # Letters are numbered with Roman numerals on their own line: "I.", "II.", etc. + parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text) + letters = [] + for part in parts: + part = part.strip() + if not part or len(part) < 100: + continue + m = re.match(r"([IVXLC]+)\.\s*\n", part) + if not m: + continue + heading = f"Letter {m.group(1)}" + body = part[m.end():].strip() + # Remove editorial footnotes in brackets + if len(body) > 50: + letters.append({ + "heading": heading, + "body": body, + "author": "John Keats", + "recipient": "Fanny Brawne", + "source": "Letters of John Keats to Fanny Brawne", + "period": "1819–1820", + }) + return letters + + +EXTRACTORS = { + "henry_viii": extract_henry_viii, + "wollstonecraft": extract_wollstonecraft, + "abelard_heloise": extract_abelard_heloise, + "napoleon": extract_napoleon, + "keats_brawne": extract_keats_brawne, +} + + +# --------------------------------------------------------------------------- +# Caching +# --------------------------------------------------------------------------- + +def get_cache_path(source_id: str) -> str: + return os.path.join(CACHE_DIR, f"{source_id}.json") + + +def load_cached_letters(source_id: str) -> list[dict] | None: + path = get_cache_path(source_id) + if os.path.exists(path): + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + return None + + +def save_cached_letters(source_id: str, letters: list[dict]) -> None: + os.makedirs(CACHE_DIR, exist_ok=True) + with open(get_cache_path(source_id), "w", encoding="utf-8") as f: + json.dump(letters, f, ensure_ascii=False, indent=2) + + +# --------------------------------------------------------------------------- +# Main logic +# --------------------------------------------------------------------------- + +def fetch_and_parse(source: dict) -> list[dict]: + """Download, extract, and cache letters for a given source.""" + cached = load_cached_letters(source["id"]) + if cached is not None: + return cached + + print(f" Downloading: {source['title']}…", flush=True) + try: + raw = download_text(source["url"]) + except Exception as e: + print(f" ⚠ Failed to download {source['title']}: {e}") + return [] + + extractor = EXTRACTORS.get(source["id"]) + if extractor is None: + return [] + + letters = extractor(raw) + if letters: + save_cached_letters(source["id"], letters) + return letters + + +def load_all_letters() -> list[dict]: + """Load letters from all sources, downloading as needed.""" + all_letters: list[dict] = [] + for source in SOURCES: + letters = fetch_and_parse(source) + all_letters.extend(letters) + return all_letters + + +def wrap_text(text: str, width: int = 78) -> str: + """Word-wrap text while preserving paragraph breaks.""" + paragraphs = re.split(r"\n\s*\n", text) + wrapped = [] + for para in paragraphs: + para = " ".join(para.split()) + wrapped.append(textwrap.fill(para, width=width)) + return "\n\n".join(wrapped) + + +def truncate_letter(body: str, max_chars: int = 3000) -> str: + """Truncate very long letters with an ellipsis note.""" + if len(body) <= max_chars: + return body + truncated = body[:max_chars] + last_period = truncated.rfind(".") + if last_period > max_chars // 2: + truncated = truncated[: last_period + 1] + return truncated + "\n\n […letter continues…]" + + +def display_letter(letter: dict) -> None: + """Pretty-print a single love letter to the terminal.""" + print() + print(SEPARATOR) + print(f" ✉ {letter['author']} → {letter['recipient']}") + if letter.get("heading"): + print(f" {letter['heading']}") + print(f" ({letter['period']})") + print(SEPARATOR) + print() + + body = truncate_letter(letter["body"]) + print(wrap_text(body)) + + print() + print(SEPARATOR) + print(f" Source: {letter['source']}") + print(f" Via Project Gutenberg • gutenberg.org") + print(SEPARATOR) + print() + + +def list_sources() -> None: + """Print available letter collections.""" + print("\n Available collections:\n") + for i, src in enumerate(SOURCES, 1): + print(f" {i}. {src['title']}") + print(f" {src['author']} → {src['recipient']} ({src['year']})") + print(f" gutenberg.org/ebooks/{src['gutenberg_id']}") + print() + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser( + description="Display random historic love letters from Project Gutenberg.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent("""\ + examples: + %(prog)s Show a random love letter + %(prog)s -n 3 Show 3 random love letters + %(prog)s --list List available collections + %(prog)s --refresh Re-download all sources + """), + ) + parser.add_argument( + "-n", "--count", type=int, default=1, metavar="N", + help="number of letters to display (default: 1)", + ) + parser.add_argument( + "--list", action="store_true", + help="list available letter collections", + ) + parser.add_argument( + "--refresh", action="store_true", + help="clear cache and re-download all sources", + ) + parser.add_argument( + "--source", type=str, metavar="ID", + choices=[s["id"] for s in SOURCES], + help="only show letters from a specific source", + ) + + args = parser.parse_args() + + if args.list: + list_sources() + return + + if args.refresh: + import shutil + if os.path.isdir(CACHE_DIR): + shutil.rmtree(CACHE_DIR) + print(" Cache cleared.") + + print("\n 💌 Love Letters — loading collections…\n") + all_letters = load_all_letters() + + if not all_letters: + print(" No letters could be loaded. Check your internet connection.") + sys.exit(1) + + if args.source: + all_letters = [l for l in all_letters if any( + s["id"] == args.source and l["source"] == s["title"] + for s in SOURCES + )] + if not all_letters: + print(f" No letters found for source '{args.source}'.") + sys.exit(1) + + count = min(args.count, len(all_letters)) + chosen = random.sample(all_letters, count) + + for letter in chosen: + display_letter(letter) + + +if __name__ == "__main__": + main()