You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

471 lines
15 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
"""
Love Letters — Display random historic love letters from Project Gutenberg.
Sources:
• Henry VIII to Anne Boleyn (c. 15271528)
• Mary Wollstonecraft to Gilbert Imlay (17931795)
• Letters of Abelard and Heloise (12th century)
• Napoleon Bonaparte to Josephine (17961812)
• John Keats to Fanny Brawne (18191820)
"""
import json
import os
import random
import re
import sys
import textwrap
import urllib.request
CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache")
SOURCES = [
{
"id": "henry_viii",
"title": "The Love Letters of Henry VIII to Anne Boleyn",
"author": "Henry VIII",
"recipient": "Anne Boleyn",
"year": "c. 15271528",
"url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt",
"gutenberg_id": 32155,
},
{
"id": "wollstonecraft",
"title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
"author": "Mary Wollstonecraft",
"recipient": "Gilbert Imlay",
"year": "17931795",
"url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt",
"gutenberg_id": 34413,
},
{
"id": "abelard_heloise",
"title": "Letters of Abelard and Heloise",
"author": "Abelard & Heloise",
"recipient": "each other",
"year": "12th century",
"url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt",
"gutenberg_id": 35977,
},
{
"id": "napoleon",
"title": "Napoleon's Letters to Josephine",
"author": "Napoleon Bonaparte",
"recipient": "Josephine",
"year": "17961812",
"url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt",
"gutenberg_id": 37499,
},
{
"id": "keats_brawne",
"title": "Letters of John Keats to Fanny Brawne",
"author": "John Keats",
"recipient": "Fanny Brawne",
"year": "18191820",
"url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt",
"gutenberg_id": 60433,
},
]
SEPARATOR = "" * 60
def download_text(url: str) -> str:
"""Download a plain-text file from Project Gutenberg."""
req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"})
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8", errors="replace")
def strip_gutenberg_header_footer(text: str) -> str:
"""Remove the Project Gutenberg header and footer boilerplate."""
start_markers = [
"*** START OF THE PROJECT GUTENBERG EBOOK",
"*** START OF THIS PROJECT GUTENBERG EBOOK",
"***START OF THE PROJECT GUTENBERG EBOOK",
]
end_markers = [
"*** END OF THE PROJECT GUTENBERG EBOOK",
"*** END OF THIS PROJECT GUTENBERG EBOOK",
"***END OF THE PROJECT GUTENBERG EBOOK",
"End of the Project Gutenberg EBook",
"End of Project Gutenberg",
]
for marker in start_markers:
idx = text.find(marker)
if idx != -1:
text = text[idx + len(marker) :]
nl = text.find("\n")
if nl != -1:
text = text[nl + 1 :]
break
for marker in end_markers:
idx = text.find(marker)
if idx != -1:
text = text[:idx]
break
return text.strip()
# ---------------------------------------------------------------------------
# Per-source letter extraction
# ---------------------------------------------------------------------------
def extract_henry_viii(text: str) -> list[dict]:
"""Extract individual letters from the Henry VIII collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters use written-out ordinals: "Letter First", "Letter Second", etc.
parts = re.split(
r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|"
r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|"
r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)",
text,
)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 80:
continue
m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
# Remove notes section at the end
notes_idx = body.find("\nNotes\n")
if notes_idx == -1:
notes_idx = body.find("\nNOTES\n")
if notes_idx != -1:
body = body[:notes_idx].strip()
author = "Henry VIII"
recipient = "Anne Boleyn"
if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]:
author = "Anne Boleyn"
recipient = "Cardinal Wolsey"
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": author,
"recipient": recipient,
"source": "The Love Letters of Henry VIII to Anne Boleyn",
"period": "c. 15271528",
})
return letters
def extract_wollstonecraft(text: str) -> list[dict]:
"""Extract individual letters from the Wollstonecraft collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 80:
continue
m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE)
heading = m.group(1) if m else ""
body = part[m.end():].strip() if m else part
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": "Mary Wollstonecraft",
"recipient": "Gilbert Imlay",
"source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
"period": "17931795",
})
return letters
def extract_abelard_heloise(text: str) -> list[dict]:
"""Extract individual letters from the Abelard & Heloise collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 120:
continue
m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
author = "Abelard & Heloise"
recipient = "each other"
lower = body[:300].lower()
if "heloise to abelard" in lower:
author = "Heloise"
recipient = "Abelard"
elif "abelard to heloise" in lower:
author = "Abelard"
recipient = "Heloise"
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": author,
"recipient": recipient,
"source": "Letters of Abelard and Heloise",
"period": "12th century",
})
return letters
def extract_napoleon(text: str) -> list[dict]:
"""Extract individual letters from Napoleon's letters to Josephine."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters are headed "No. 1.", "No. 2.", etc. on their own line
parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 100:
continue
m = re.match(r"(No\.\s*\d+\.)\s*\n", part)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
# Skip table of contents entries (short lines with page numbers)
if len(body) < 80:
continue
letters.append({
"heading": heading,
"body": body,
"author": "Napoleon Bonaparte",
"recipient": "Josephine",
"source": "Napoleon's Letters to Josephine, 17961812",
"period": "17961812",
})
return letters
def extract_keats_brawne(text: str) -> list[dict]:
"""Extract individual letters from Keats to Fanny Brawne."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters are numbered with Roman numerals on their own line: "I.", "II.", etc.
parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 100:
continue
m = re.match(r"([IVXLC]+)\.\s*\n", part)
if not m:
continue
heading = f"Letter {m.group(1)}"
body = part[m.end():].strip()
# Remove editorial footnotes in brackets
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": "John Keats",
"recipient": "Fanny Brawne",
"source": "Letters of John Keats to Fanny Brawne",
"period": "18191820",
})
return letters
EXTRACTORS = {
"henry_viii": extract_henry_viii,
"wollstonecraft": extract_wollstonecraft,
"abelard_heloise": extract_abelard_heloise,
"napoleon": extract_napoleon,
"keats_brawne": extract_keats_brawne,
}
# ---------------------------------------------------------------------------
# Caching
# ---------------------------------------------------------------------------
def get_cache_path(source_id: str) -> str:
return os.path.join(CACHE_DIR, f"{source_id}.json")
def load_cached_letters(source_id: str) -> list[dict] | None:
path = get_cache_path(source_id)
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
return None
def save_cached_letters(source_id: str, letters: list[dict]) -> None:
os.makedirs(CACHE_DIR, exist_ok=True)
with open(get_cache_path(source_id), "w", encoding="utf-8") as f:
json.dump(letters, f, ensure_ascii=False, indent=2)
# ---------------------------------------------------------------------------
# Main logic
# ---------------------------------------------------------------------------
def fetch_and_parse(source: dict) -> list[dict]:
"""Download, extract, and cache letters for a given source."""
cached = load_cached_letters(source["id"])
if cached is not None:
return cached
print(f" Downloading: {source['title']}", flush=True)
try:
raw = download_text(source["url"])
except Exception as e:
print(f" ⚠ Failed to download {source['title']}: {e}")
return []
extractor = EXTRACTORS.get(source["id"])
if extractor is None:
return []
letters = extractor(raw)
if letters:
save_cached_letters(source["id"], letters)
return letters
def load_all_letters() -> list[dict]:
"""Load letters from all sources, downloading as needed."""
all_letters: list[dict] = []
for source in SOURCES:
letters = fetch_and_parse(source)
all_letters.extend(letters)
return all_letters
def wrap_text(text: str, width: int = 78) -> str:
"""Word-wrap text while preserving paragraph breaks."""
paragraphs = re.split(r"\n\s*\n", text)
wrapped = []
for para in paragraphs:
para = " ".join(para.split())
wrapped.append(textwrap.fill(para, width=width))
return "\n\n".join(wrapped)
def truncate_letter(body: str, max_chars: int = 3000) -> str:
"""Truncate very long letters with an ellipsis note."""
if len(body) <= max_chars:
return body
truncated = body[:max_chars]
last_period = truncated.rfind(".")
if last_period > max_chars // 2:
truncated = truncated[: last_period + 1]
return truncated + "\n\n […letter continues…]"
def display_letter(letter: dict) -> None:
"""Pretty-print a single love letter to the terminal."""
print()
print(SEPARATOR)
print(f"{letter['author']}{letter['recipient']}")
if letter.get("heading"):
print(f" {letter['heading']}")
print(f" ({letter['period']})")
print(SEPARATOR)
print()
body = truncate_letter(letter["body"])
print(wrap_text(body))
print()
print(SEPARATOR)
print(f" Source: {letter['source']}")
print(f" Via Project Gutenberg • gutenberg.org")
print(SEPARATOR)
print()
def list_sources() -> None:
"""Print available letter collections."""
print("\n Available collections:\n")
for i, src in enumerate(SOURCES, 1):
print(f" {i}. {src['title']}")
print(f" {src['author']}{src['recipient']} ({src['year']})")
print(f" gutenberg.org/ebooks/{src['gutenberg_id']}")
print()
def main() -> None:
import argparse
parser = argparse.ArgumentParser(
description="Display random historic love letters from Project Gutenberg.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent("""\
examples:
%(prog)s Show a random love letter
%(prog)s -n 3 Show 3 random love letters
%(prog)s --list List available collections
%(prog)s --refresh Re-download all sources
"""),
)
parser.add_argument(
"-n", "--count", type=int, default=1, metavar="N",
help="number of letters to display (default: 1)",
)
parser.add_argument(
"--list", action="store_true",
help="list available letter collections",
)
parser.add_argument(
"--refresh", action="store_true",
help="clear cache and re-download all sources",
)
parser.add_argument(
"--source", type=str, metavar="ID",
choices=[s["id"] for s in SOURCES],
help="only show letters from a specific source",
)
args = parser.parse_args()
if args.list:
list_sources()
return
if args.refresh:
import shutil
if os.path.isdir(CACHE_DIR):
shutil.rmtree(CACHE_DIR)
print(" Cache cleared.")
print("\n 💌 Love Letters — loading collections…\n")
all_letters = load_all_letters()
if not all_letters:
print(" No letters could be loaded. Check your internet connection.")
sys.exit(1)
if args.source:
all_letters = [l for l in all_letters if any(
s["id"] == args.source and l["source"] == s["title"]
for s in SOURCES
)]
if not all_letters:
print(f" No letters found for source '{args.source}'.")
sys.exit(1)
count = min(args.count, len(all_letters))
chosen = random.sample(all_letters, count)
for letter in chosen:
display_letter(letter)
if __name__ == "__main__":
main()