Add love letters app with Project Gutenberg sources

Python app that downloads and displays random historic love letters from
five authentic collections on Project Gutenberg:

- Henry VIII to Anne Boleyn (c. 1527-1528)
- Mary Wollstonecraft to Gilbert Imlay (1793-1795)
- Abelard & Heloise (12th century)
- Napoleon Bonaparte to Josephine (1796-1812)
- John Keats to Fanny Brawne (1819-1820)

Features: local caching, source filtering, multi-letter display,
text wrapping, and long letter truncation.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
main
Ibraheem Saleh 1 week ago
parent f45fe56f94
commit 7fa79a1ba7

3
.gitignore vendored

@ -160,3 +160,6 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Love Letters app cache
.letter_cache/

@ -1,2 +1,29 @@
# letters
A Python app that displays random historic love letters from authentic sources, downloaded from [Project Gutenberg](https://www.gutenberg.org/).
## Usage
```bash
python3 love_letters.py # Show a random love letter
python3 love_letters.py -n 3 # Show 3 random love letters
python3 love_letters.py --list # List available collections
python3 love_letters.py --source keats_brawne # Filter by source
python3 love_letters.py --refresh # Re-download all sources
```
## Sources
| Collection | Author → Recipient | Period |
|---|---|---|
| The Love Letters of Henry VIII to Anne Boleyn | Henry VIII → Anne Boleyn | c. 15271528 |
| The Love Letters of Mary Wollstonecraft to Gilbert Imlay | Mary Wollstonecraft → Gilbert Imlay | 17931795 |
| Letters of Abelard and Heloise | Abelard & Heloise → each other | 12th century |
| Napoleon's Letters to Josephine | Napoleon Bonaparte → Josephine | 17961812 |
| Letters of John Keats to Fanny Brawne | John Keats → Fanny Brawne | 18191820 |
All texts are sourced from [Project Gutenberg](https://www.gutenberg.org/) and are in the public domain.
## Requirements
Python 3.10+ (no external dependencies). An internet connection is required on first run to download the letter collections; they are cached locally after that.

@ -0,0 +1,470 @@
#!/usr/bin/env python3
"""
Love Letters Display random historic love letters from Project Gutenberg.
Sources:
Henry VIII to Anne Boleyn (c. 15271528)
Mary Wollstonecraft to Gilbert Imlay (17931795)
Letters of Abelard and Heloise (12th century)
Napoleon Bonaparte to Josephine (17961812)
John Keats to Fanny Brawne (18191820)
"""
import json
import os
import random
import re
import sys
import textwrap
import urllib.request
CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".letter_cache")
SOURCES = [
{
"id": "henry_viii",
"title": "The Love Letters of Henry VIII to Anne Boleyn",
"author": "Henry VIII",
"recipient": "Anne Boleyn",
"year": "c. 15271528",
"url": "https://www.gutenberg.org/cache/epub/32155/pg32155.txt",
"gutenberg_id": 32155,
},
{
"id": "wollstonecraft",
"title": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
"author": "Mary Wollstonecraft",
"recipient": "Gilbert Imlay",
"year": "17931795",
"url": "https://www.gutenberg.org/cache/epub/34413/pg34413.txt",
"gutenberg_id": 34413,
},
{
"id": "abelard_heloise",
"title": "Letters of Abelard and Heloise",
"author": "Abelard & Heloise",
"recipient": "each other",
"year": "12th century",
"url": "https://www.gutenberg.org/cache/epub/35977/pg35977.txt",
"gutenberg_id": 35977,
},
{
"id": "napoleon",
"title": "Napoleon's Letters to Josephine",
"author": "Napoleon Bonaparte",
"recipient": "Josephine",
"year": "17961812",
"url": "https://www.gutenberg.org/cache/epub/37499/pg37499.txt",
"gutenberg_id": 37499,
},
{
"id": "keats_brawne",
"title": "Letters of John Keats to Fanny Brawne",
"author": "John Keats",
"recipient": "Fanny Brawne",
"year": "18191820",
"url": "https://www.gutenberg.org/cache/epub/60433/pg60433.txt",
"gutenberg_id": 60433,
},
]
SEPARATOR = "" * 60
def download_text(url: str) -> str:
"""Download a plain-text file from Project Gutenberg."""
req = urllib.request.Request(url, headers={"User-Agent": "LoveLettersApp/1.0"})
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8", errors="replace")
def strip_gutenberg_header_footer(text: str) -> str:
"""Remove the Project Gutenberg header and footer boilerplate."""
start_markers = [
"*** START OF THE PROJECT GUTENBERG EBOOK",
"*** START OF THIS PROJECT GUTENBERG EBOOK",
"***START OF THE PROJECT GUTENBERG EBOOK",
]
end_markers = [
"*** END OF THE PROJECT GUTENBERG EBOOK",
"*** END OF THIS PROJECT GUTENBERG EBOOK",
"***END OF THE PROJECT GUTENBERG EBOOK",
"End of the Project Gutenberg EBook",
"End of Project Gutenberg",
]
for marker in start_markers:
idx = text.find(marker)
if idx != -1:
text = text[idx + len(marker) :]
nl = text.find("\n")
if nl != -1:
text = text[nl + 1 :]
break
for marker in end_markers:
idx = text.find(marker)
if idx != -1:
text = text[:idx]
break
return text.strip()
# ---------------------------------------------------------------------------
# Per-source letter extraction
# ---------------------------------------------------------------------------
def extract_henry_viii(text: str) -> list[dict]:
"""Extract individual letters from the Henry VIII collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters use written-out ordinals: "Letter First", "Letter Second", etc.
parts = re.split(
r"\n{2,}(?=Letter\s+(?:First|Second|Third|Fourth|Fifth|Sixth|Seventh|"
r"Eighth|Ninth|Tenth|Eleventh|Twelfth|Thirteenth|Fourteenth|"
r"Fifteenth|Sixteenth|Seventeenth|Eighteenth)\b)",
text,
)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 80:
continue
m = re.match(r"(Letter\s+\w+)(?:\s+.*?)?\n", part, re.IGNORECASE)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
# Remove notes section at the end
notes_idx = body.find("\nNotes\n")
if notes_idx == -1:
notes_idx = body.find("\nNOTES\n")
if notes_idx != -1:
body = body[:notes_idx].strip()
author = "Henry VIII"
recipient = "Anne Boleyn"
if "Anne Boleyn to Wolsey" in part[:200] or "Boleyn to" in part[:200]:
author = "Anne Boleyn"
recipient = "Cardinal Wolsey"
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": author,
"recipient": recipient,
"source": "The Love Letters of Henry VIII to Anne Boleyn",
"period": "c. 15271528",
})
return letters
def extract_wollstonecraft(text: str) -> list[dict]:
"""Extract individual letters from the Wollstonecraft collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+\.?\s*\n)", text, flags=re.IGNORECASE)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 80:
continue
m = re.match(r"(LETTER\s+[IVXLC0-9]+\.?)\s*\n", part, re.IGNORECASE)
heading = m.group(1) if m else ""
body = part[m.end():].strip() if m else part
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": "Mary Wollstonecraft",
"recipient": "Gilbert Imlay",
"source": "The Love Letters of Mary Wollstonecraft to Gilbert Imlay",
"period": "17931795",
})
return letters
def extract_abelard_heloise(text: str) -> list[dict]:
"""Extract individual letters from the Abelard & Heloise collection."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
parts = re.split(r"\n{2,}(?=LETTER\s+[IVXLC0-9]+[.:]?\s*\n)", text, flags=re.IGNORECASE)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 120:
continue
m = re.match(r"(LETTER\s+[IVXLC0-9]+[.:]?)\s*\n", part, re.IGNORECASE)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
author = "Abelard & Heloise"
recipient = "each other"
lower = body[:300].lower()
if "heloise to abelard" in lower:
author = "Heloise"
recipient = "Abelard"
elif "abelard to heloise" in lower:
author = "Abelard"
recipient = "Heloise"
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": author,
"recipient": recipient,
"source": "Letters of Abelard and Heloise",
"period": "12th century",
})
return letters
def extract_napoleon(text: str) -> list[dict]:
"""Extract individual letters from Napoleon's letters to Josephine."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters are headed "No. 1.", "No. 2.", etc. on their own line
parts = re.split(r"\n{2,}(?=No\.\s*\d+\.\s*\n)", text)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 100:
continue
m = re.match(r"(No\.\s*\d+\.)\s*\n", part)
if not m:
continue
heading = m.group(1)
body = part[m.end():].strip()
# Skip table of contents entries (short lines with page numbers)
if len(body) < 80:
continue
letters.append({
"heading": heading,
"body": body,
"author": "Napoleon Bonaparte",
"recipient": "Josephine",
"source": "Napoleon's Letters to Josephine, 17961812",
"period": "17961812",
})
return letters
def extract_keats_brawne(text: str) -> list[dict]:
"""Extract individual letters from Keats to Fanny Brawne."""
text = strip_gutenberg_header_footer(text)
text = text.replace("\r\n", "\n")
# Letters are numbered with Roman numerals on their own line: "I.", "II.", etc.
parts = re.split(r"\n{2,}(?=[IVXLC]+\.\s*\n)", text)
letters = []
for part in parts:
part = part.strip()
if not part or len(part) < 100:
continue
m = re.match(r"([IVXLC]+)\.\s*\n", part)
if not m:
continue
heading = f"Letter {m.group(1)}"
body = part[m.end():].strip()
# Remove editorial footnotes in brackets
if len(body) > 50:
letters.append({
"heading": heading,
"body": body,
"author": "John Keats",
"recipient": "Fanny Brawne",
"source": "Letters of John Keats to Fanny Brawne",
"period": "18191820",
})
return letters
EXTRACTORS = {
"henry_viii": extract_henry_viii,
"wollstonecraft": extract_wollstonecraft,
"abelard_heloise": extract_abelard_heloise,
"napoleon": extract_napoleon,
"keats_brawne": extract_keats_brawne,
}
# ---------------------------------------------------------------------------
# Caching
# ---------------------------------------------------------------------------
def get_cache_path(source_id: str) -> str:
return os.path.join(CACHE_DIR, f"{source_id}.json")
def load_cached_letters(source_id: str) -> list[dict] | None:
path = get_cache_path(source_id)
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
return None
def save_cached_letters(source_id: str, letters: list[dict]) -> None:
os.makedirs(CACHE_DIR, exist_ok=True)
with open(get_cache_path(source_id), "w", encoding="utf-8") as f:
json.dump(letters, f, ensure_ascii=False, indent=2)
# ---------------------------------------------------------------------------
# Main logic
# ---------------------------------------------------------------------------
def fetch_and_parse(source: dict) -> list[dict]:
"""Download, extract, and cache letters for a given source."""
cached = load_cached_letters(source["id"])
if cached is not None:
return cached
print(f" Downloading: {source['title']}", flush=True)
try:
raw = download_text(source["url"])
except Exception as e:
print(f" ⚠ Failed to download {source['title']}: {e}")
return []
extractor = EXTRACTORS.get(source["id"])
if extractor is None:
return []
letters = extractor(raw)
if letters:
save_cached_letters(source["id"], letters)
return letters
def load_all_letters() -> list[dict]:
"""Load letters from all sources, downloading as needed."""
all_letters: list[dict] = []
for source in SOURCES:
letters = fetch_and_parse(source)
all_letters.extend(letters)
return all_letters
def wrap_text(text: str, width: int = 78) -> str:
"""Word-wrap text while preserving paragraph breaks."""
paragraphs = re.split(r"\n\s*\n", text)
wrapped = []
for para in paragraphs:
para = " ".join(para.split())
wrapped.append(textwrap.fill(para, width=width))
return "\n\n".join(wrapped)
def truncate_letter(body: str, max_chars: int = 3000) -> str:
"""Truncate very long letters with an ellipsis note."""
if len(body) <= max_chars:
return body
truncated = body[:max_chars]
last_period = truncated.rfind(".")
if last_period > max_chars // 2:
truncated = truncated[: last_period + 1]
return truncated + "\n\n […letter continues…]"
def display_letter(letter: dict) -> None:
"""Pretty-print a single love letter to the terminal."""
print()
print(SEPARATOR)
print(f"{letter['author']}{letter['recipient']}")
if letter.get("heading"):
print(f" {letter['heading']}")
print(f" ({letter['period']})")
print(SEPARATOR)
print()
body = truncate_letter(letter["body"])
print(wrap_text(body))
print()
print(SEPARATOR)
print(f" Source: {letter['source']}")
print(f" Via Project Gutenberg • gutenberg.org")
print(SEPARATOR)
print()
def list_sources() -> None:
"""Print available letter collections."""
print("\n Available collections:\n")
for i, src in enumerate(SOURCES, 1):
print(f" {i}. {src['title']}")
print(f" {src['author']}{src['recipient']} ({src['year']})")
print(f" gutenberg.org/ebooks/{src['gutenberg_id']}")
print()
def main() -> None:
import argparse
parser = argparse.ArgumentParser(
description="Display random historic love letters from Project Gutenberg.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent("""\
examples:
%(prog)s Show a random love letter
%(prog)s -n 3 Show 3 random love letters
%(prog)s --list List available collections
%(prog)s --refresh Re-download all sources
"""),
)
parser.add_argument(
"-n", "--count", type=int, default=1, metavar="N",
help="number of letters to display (default: 1)",
)
parser.add_argument(
"--list", action="store_true",
help="list available letter collections",
)
parser.add_argument(
"--refresh", action="store_true",
help="clear cache and re-download all sources",
)
parser.add_argument(
"--source", type=str, metavar="ID",
choices=[s["id"] for s in SOURCES],
help="only show letters from a specific source",
)
args = parser.parse_args()
if args.list:
list_sources()
return
if args.refresh:
import shutil
if os.path.isdir(CACHE_DIR):
shutil.rmtree(CACHE_DIR)
print(" Cache cleared.")
print("\n 💌 Love Letters — loading collections…\n")
all_letters = load_all_letters()
if not all_letters:
print(" No letters could be loaded. Check your internet connection.")
sys.exit(1)
if args.source:
all_letters = [l for l in all_letters if any(
s["id"] == args.source and l["source"] == s["title"]
for s in SOURCES
)]
if not all_letters:
print(f" No letters found for source '{args.source}'.")
sys.exit(1)
count = min(args.count, len(all_letters))
chosen = random.sample(all_letters, count)
for letter in chosen:
display_letter(letter)
if __name__ == "__main__":
main()
Loading…
Cancel
Save