|
|
#!/usr/bin/env python3
|
|
|
"""
|
|
|
Love Letters — Display random historic love letters from Project Gutenberg.
|
|
|
|
|
|
Sources:
|
|
|
• Henry VIII to Anne Boleyn (c. 1527–1528)
|
|
|
• Mary Wollstonecraft to Gilbert Imlay (1793–1795)
|
|
|
• Letters of Abelard and Heloise (12th century)
|
|
|
• Napoleon Bonaparte to Josephine (1796–1812)
|
|
|
• John Keats to Fanny Brawne (1819–1820)
|
|
|
"""
|
|
|
|
|
|
import json
|
|
|
import os
|
|
|
import random
|
|
|
import re
|
|
|
import sys
|
|
|
import textwrap
|
|
|
import urllib.request
|
|
|
|
|
|
CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache")
|
|
|
|
|
|
SOURCES = [
|
|
|
{
|
|
|
"id": "henry_viii",
|
|
|
"title": "The Love Letters of Henry VIII to Anne Boleyn",
|
|
|
"author": "Henry VIII",
|
|
|
"recipient": "Anne Boleyn",
|
|
|
"year": "c. 1527–1528",
|
|
|
"url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt",
|
|
|
"gutenberg_id": 32155,
|
|
|
},
|
|
|
{
|
|
|
"id": "wollstonecraft",
|
|
|
"title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
|
|
|
"author": "Mary Wollstonecraft",
|
|
|
"recipient": "Gilbert Imlay",
|
|
|
"year": "1793–1795",
|
|
|
"url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt",
|
|
|
"gutenberg_id": 34413,
|
|
|
},
|
|
|
{
|
|
|
"id": "abelard_heloise",
|
|
|
"title": "Letters of Abelard and Heloise",
|
|
|
"author": "Abelard & Heloise",
|
|
|
"recipient": "each other",
|
|
|
"year": "12th century",
|
|
|
"url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt",
|
|
|
"gutenberg_id": 35977,
|
|
|
},
|
|
|
{
|
|
|
"id": "napoleon",
|
|
|
"title": "Napoleon's Letters to Josephine",
|
|
|
"author": "Napoleon Bonaparte",
|
|
|
"recipient": "Josephine",
|
|
|
"year": "1796–1812",
|
|
|
"url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt",
|
|
|
"gutenberg_id": 37499,
|
|
|
},
|
|
|
{
|
|
|
"id": "keats_brawne",
|
|
|
"title": "Letters of John Keats to Fanny Brawne",
|
|
|
"author": "John Keats",
|
|
|
"recipient": "Fanny Brawne",
|
|
|
"year": "1819–1820",
|
|
|
"url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt",
|
|
|
"gutenberg_id": 60433,
|
|
|
},
|
|
|
]
|
|
|
|
|
|
SEPARATOR = "─" * 60
|
|
|
|
|
|
|
|
|
def download_text(url: str) -> str:
|
|
|
"""Download a plain-text file from Project Gutenberg."""
|
|
|
req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"})
|
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
|
return resp.read().decode("utf-8", errors="replace")
|
|
|
|
|
|
|
|
|
def strip_gutenberg_header_footer(text: str) -> str:
|
|
|
"""Remove the Project Gutenberg header and footer boilerplate."""
|
|
|
start_markers = [
|
|
|
"*** START OF THE PROJECT GUTENBERG EBOOK",
|
|
|
"*** START OF THIS PROJECT GUTENBERG EBOOK",
|
|
|
"***START OF THE PROJECT GUTENBERG EBOOK",
|
|
|
]
|
|
|
end_markers = [
|
|
|
"*** END OF THE PROJECT GUTENBERG EBOOK",
|
|
|
"*** END OF THIS PROJECT GUTENBERG EBOOK",
|
|
|
"***END OF THE PROJECT GUTENBERG EBOOK",
|
|
|
"End of the Project Gutenberg EBook",
|
|
|
"End of Project Gutenberg",
|
|
|
]
|
|
|
|
|
|
for marker in start_markers:
|
|
|
idx = text.find(marker)
|
|
|
if idx != -1:
|
|
|
text = text[idx + len(marker) :]
|
|
|
nl = text.find("\n")
|
|
|
if nl != -1:
|
|
|
text = text[nl + 1 :]
|
|
|
break
|
|
|
|
|
|
for marker in end_markers:
|
|
|
idx = text.find(marker)
|
|
|
if idx != -1:
|
|
|
text = text[:idx]
|
|
|
break
|
|
|
|
|
|
return text.strip()
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Per-source letter extraction
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def extract_henry_viii(text: str) -> list[dict]:
|
|
|
"""Extract individual letters from the Henry VIII collection."""
|
|
|
text = strip_gutenberg_header_footer(text)
|
|
|
text = text.replace("\r\n", "\n")
|
|
|
# Letters use written-out ordinals: "Letter First", "Letter Second", etc.
|
|
|
parts = re.split(
|
|
|
r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|"
|
|
|
r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|"
|
|
|
r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)",
|
|
|
text,
|
|
|
)
|
|
|
letters = []
|
|
|
for part in parts:
|
|
|
part = part.strip()
|
|
|
if not part or len(part) < 80:
|
|
|
continue
|
|
|
m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE)
|
|
|
if not m:
|
|
|
continue
|
|
|
heading = m.group(1)
|
|
|
body = part[m.end():].strip()
|
|
|
# Remove notes section at the end
|
|
|
notes_idx = body.find("\nNotes\n")
|
|
|
if notes_idx == -1:
|
|
|
notes_idx = body.find("\nNOTES\n")
|
|
|
if notes_idx != -1:
|
|
|
body = body[:notes_idx].strip()
|
|
|
author = "Henry VIII"
|
|
|
recipient = "Anne Boleyn"
|
|
|
if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]:
|
|
|
author = "Anne Boleyn"
|
|
|
recipient = "Cardinal Wolsey"
|
|
|
if len(body) > 50:
|
|
|
letters.append({
|
|
|
"heading": heading,
|
|
|
"body": body,
|
|
|
"author": author,
|
|
|
"recipient": recipient,
|
|
|
"source": "The Love Letters of Henry VIII to Anne Boleyn",
|
|
|
"period": "c. 1527–1528",
|
|
|
})
|
|
|
return letters
|
|
|
|
|
|
|
|
|
def extract_wollstonecraft(text: str) -> list[dict]:
|
|
|
"""Extract individual letters from the Wollstonecraft collection."""
|
|
|
text = strip_gutenberg_header_footer(text)
|
|
|
text = text.replace("\r\n", "\n")
|
|
|
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE)
|
|
|
letters = []
|
|
|
for part in parts:
|
|
|
part = part.strip()
|
|
|
if not part or len(part) < 80:
|
|
|
continue
|
|
|
m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE)
|
|
|
heading = m.group(1) if m else ""
|
|
|
body = part[m.end():].strip() if m else part
|
|
|
if len(body) > 50:
|
|
|
letters.append({
|
|
|
"heading": heading,
|
|
|
"body": body,
|
|
|
"author": "Mary Wollstonecraft",
|
|
|
"recipient": "Gilbert Imlay",
|
|
|
"source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
|
|
|
"period": "1793–1795",
|
|
|
})
|
|
|
return letters
|
|
|
|
|
|
|
|
|
def extract_abelard_heloise(text: str) -> list[dict]:
|
|
|
"""Extract individual letters from the Abelard & Heloise collection."""
|
|
|
text = strip_gutenberg_header_footer(text)
|
|
|
text = text.replace("\r\n", "\n")
|
|
|
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE)
|
|
|
letters = []
|
|
|
for part in parts:
|
|
|
part = part.strip()
|
|
|
if not part or len(part) < 120:
|
|
|
continue
|
|
|
m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE)
|
|
|
if not m:
|
|
|
continue
|
|
|
heading = m.group(1)
|
|
|
body = part[m.end():].strip()
|
|
|
author = "Abelard & Heloise"
|
|
|
recipient = "each other"
|
|
|
lower = body[:300].lower()
|
|
|
if "heloise to abelard" in lower:
|
|
|
author = "Heloise"
|
|
|
recipient = "Abelard"
|
|
|
elif "abelard to heloise" in lower:
|
|
|
author = "Abelard"
|
|
|
recipient = "Heloise"
|
|
|
if len(body) > 50:
|
|
|
letters.append({
|
|
|
"heading": heading,
|
|
|
"body": body,
|
|
|
"author": author,
|
|
|
"recipient": recipient,
|
|
|
"source": "Letters of Abelard and Heloise",
|
|
|
"period": "12th century",
|
|
|
})
|
|
|
return letters
|
|
|
|
|
|
|
|
|
def extract_napoleon(text: str) -> list[dict]:
|
|
|
"""Extract individual letters from Napoleon's letters to Josephine."""
|
|
|
text = strip_gutenberg_header_footer(text)
|
|
|
text = text.replace("\r\n", "\n")
|
|
|
# Letters are headed "No. 1.", "No. 2.", etc. on their own line
|
|
|
parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text)
|
|
|
letters = []
|
|
|
for part in parts:
|
|
|
part = part.strip()
|
|
|
if not part or len(part) < 100:
|
|
|
continue
|
|
|
m = re.match(r"(No\.\s*\d+\.)\s*\n", part)
|
|
|
if not m:
|
|
|
continue
|
|
|
heading = m.group(1)
|
|
|
body = part[m.end():].strip()
|
|
|
# Skip table of contents entries (short lines with page numbers)
|
|
|
if len(body) < 80:
|
|
|
continue
|
|
|
letters.append({
|
|
|
"heading": heading,
|
|
|
"body": body,
|
|
|
"author": "Napoleon Bonaparte",
|
|
|
"recipient": "Josephine",
|
|
|
"source": "Napoleon's Letters to Josephine, 1796–1812",
|
|
|
"period": "1796–1812",
|
|
|
})
|
|
|
return letters
|
|
|
|
|
|
|
|
|
def extract_keats_brawne(text: str) -> list[dict]:
|
|
|
"""Extract individual letters from Keats to Fanny Brawne."""
|
|
|
text = strip_gutenberg_header_footer(text)
|
|
|
text = text.replace("\r\n", "\n")
|
|
|
# Letters are numbered with Roman numerals on their own line: "I.", "II.", etc.
|
|
|
parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text)
|
|
|
letters = []
|
|
|
for part in parts:
|
|
|
part = part.strip()
|
|
|
if not part or len(part) < 100:
|
|
|
continue
|
|
|
m = re.match(r"([IVXLC]+)\.\s*\n", part)
|
|
|
if not m:
|
|
|
continue
|
|
|
heading = f"Letter {m.group(1)}"
|
|
|
body = part[m.end():].strip()
|
|
|
# Remove editorial footnotes in brackets
|
|
|
if len(body) > 50:
|
|
|
letters.append({
|
|
|
"heading": heading,
|
|
|
"body": body,
|
|
|
"author": "John Keats",
|
|
|
"recipient": "Fanny Brawne",
|
|
|
"source": "Letters of John Keats to Fanny Brawne",
|
|
|
"period": "1819–1820",
|
|
|
})
|
|
|
return letters
|
|
|
|
|
|
|
|
|
EXTRACTORS = {
|
|
|
"henry_viii": extract_henry_viii,
|
|
|
"wollstonecraft": extract_wollstonecraft,
|
|
|
"abelard_heloise": extract_abelard_heloise,
|
|
|
"napoleon": extract_napoleon,
|
|
|
"keats_brawne": extract_keats_brawne,
|
|
|
}
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Caching
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def get_cache_path(source_id: str) -> str:
|
|
|
return os.path.join(CACHE_DIR, f"{source_id}.json")
|
|
|
|
|
|
|
|
|
def load_cached_letters(source_id: str) -> list[dict] | None:
|
|
|
path = get_cache_path(source_id)
|
|
|
if os.path.exists(path):
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
|
return json.load(f)
|
|
|
return None
|
|
|
|
|
|
|
|
|
def save_cached_letters(source_id: str, letters: list[dict]) -> None:
|
|
|
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
|
with open(get_cache_path(source_id), "w", encoding="utf-8") as f:
|
|
|
json.dump(letters, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
# Main logic
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def fetch_and_parse(source: dict) -> list[dict]:
|
|
|
"""Download, extract, and cache letters for a given source."""
|
|
|
cached = load_cached_letters(source["id"])
|
|
|
if cached is not None:
|
|
|
return cached
|
|
|
|
|
|
print(f" Downloading: {source['title']}…", flush=True)
|
|
|
try:
|
|
|
raw = download_text(source["url"])
|
|
|
except Exception as e:
|
|
|
print(f" ⚠ Failed to download {source['title']}: {e}")
|
|
|
return []
|
|
|
|
|
|
extractor = EXTRACTORS.get(source["id"])
|
|
|
if extractor is None:
|
|
|
return []
|
|
|
|
|
|
letters = extractor(raw)
|
|
|
if letters:
|
|
|
save_cached_letters(source["id"], letters)
|
|
|
return letters
|
|
|
|
|
|
|
|
|
def load_all_letters() -> list[dict]:
|
|
|
"""Load letters from all sources, downloading as needed."""
|
|
|
all_letters: list[dict] = []
|
|
|
for source in SOURCES:
|
|
|
letters = fetch_and_parse(source)
|
|
|
all_letters.extend(letters)
|
|
|
return all_letters
|
|
|
|
|
|
|
|
|
def wrap_text(text: str, width: int = 78) -> str:
|
|
|
"""Word-wrap text while preserving paragraph breaks."""
|
|
|
paragraphs = re.split(r"\n\s*\n", text)
|
|
|
wrapped = []
|
|
|
for para in paragraphs:
|
|
|
para = " ".join(para.split())
|
|
|
wrapped.append(textwrap.fill(para, width=width))
|
|
|
return "\n\n".join(wrapped)
|
|
|
|
|
|
|
|
|
def truncate_letter(body: str, max_chars: int = 3000) -> str:
|
|
|
"""Truncate very long letters with an ellipsis note."""
|
|
|
if len(body) <= max_chars:
|
|
|
return body
|
|
|
truncated = body[:max_chars]
|
|
|
last_period = truncated.rfind(".")
|
|
|
if last_period > max_chars // 2:
|
|
|
truncated = truncated[: last_period + 1]
|
|
|
return truncated + "\n\n […letter continues…]"
|
|
|
|
|
|
|
|
|
def display_letter(letter: dict) -> None:
|
|
|
"""Pretty-print a single love letter to the terminal."""
|
|
|
print()
|
|
|
print(SEPARATOR)
|
|
|
print(f" ✉ {letter['author']} → {letter['recipient']}")
|
|
|
if letter.get("heading"):
|
|
|
print(f" {letter['heading']}")
|
|
|
print(f" ({letter['period']})")
|
|
|
print(SEPARATOR)
|
|
|
print()
|
|
|
|
|
|
body = truncate_letter(letter["body"])
|
|
|
print(wrap_text(body))
|
|
|
|
|
|
print()
|
|
|
print(SEPARATOR)
|
|
|
print(f" Source: {letter['source']}")
|
|
|
print(f" Via Project Gutenberg • gutenberg.org")
|
|
|
print(SEPARATOR)
|
|
|
print()
|
|
|
|
|
|
|
|
|
def list_sources() -> None:
|
|
|
"""Print available letter collections."""
|
|
|
print("\n Available collections:\n")
|
|
|
for i, src in enumerate(SOURCES, 1):
|
|
|
print(f" {i}. {src['title']}")
|
|
|
print(f" {src['author']} → {src['recipient']} ({src['year']})")
|
|
|
print(f" gutenberg.org/ebooks/{src['gutenberg_id']}")
|
|
|
print()
|
|
|
|
|
|
|
|
|
def main() -> None:
|
|
|
import argparse
|
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
description="Display random historic love letters from Project Gutenberg.",
|
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
|
epilog=textwrap.dedent("""\
|
|
|
examples:
|
|
|
%(prog)s Show a random love letter
|
|
|
%(prog)s -n 3 Show 3 random love letters
|
|
|
%(prog)s --list List available collections
|
|
|
%(prog)s --refresh Re-download all sources
|
|
|
"""),
|
|
|
)
|
|
|
parser.add_argument(
|
|
|
"-n", "--count", type=int, default=1, metavar="N",
|
|
|
help="number of letters to display (default: 1)",
|
|
|
)
|
|
|
parser.add_argument(
|
|
|
"--list", action="store_true",
|
|
|
help="list available letter collections",
|
|
|
)
|
|
|
parser.add_argument(
|
|
|
"--refresh", action="store_true",
|
|
|
help="clear cache and re-download all sources",
|
|
|
)
|
|
|
parser.add_argument(
|
|
|
"--source", type=str, metavar="ID",
|
|
|
choices=[s["id"] for s in SOURCES],
|
|
|
help="only show letters from a specific source",
|
|
|
)
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
if args.list:
|
|
|
list_sources()
|
|
|
return
|
|
|
|
|
|
if args.refresh:
|
|
|
import shutil
|
|
|
if os.path.isdir(CACHE_DIR):
|
|
|
shutil.rmtree(CACHE_DIR)
|
|
|
print(" Cache cleared.")
|
|
|
|
|
|
print("\n 💌 Love Letters — loading collections…\n")
|
|
|
all_letters = load_all_letters()
|
|
|
|
|
|
if not all_letters:
|
|
|
print(" No letters could be loaded. Check your internet connection.")
|
|
|
sys.exit(1)
|
|
|
|
|
|
if args.source:
|
|
|
all_letters = [l for l in all_letters if any(
|
|
|
s["id"] == args.source and l["source"] == s["title"]
|
|
|
for s in SOURCES
|
|
|
)]
|
|
|
if not all_letters:
|
|
|
print(f" No letters found for source '{args.source}'.")
|
|
|
sys.exit(1)
|
|
|
|
|
|
count = min(args.count, len(all_letters))
|
|
|
chosen = random.sample(all_letters, count)
|
|
|
|
|
|
for letter in chosen:
|
|
|
display_letter(letter)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|