Spaces:

dssjon
/

biblos-cf-api

Sleeping

rdmlx commited on Oct 15, 2025

Commit

a356e85

1 Parent(s): f027991

Expand to include OT books and add Jerome, John Chrysostom, and Ambrose of Milan

- Add OLD_TESTAMENT_BOOKS (39 books) to both prepare_data.py and app.py
- Create ALL_BOOKS = OT + NT (66 books total)
- Add 3 new Church Fathers: Jerome, John Chrysostom, Ambrose of Milan
- Total of 12 Church Fathers now included
- Backup NT-only data to data-nt-only-backup-9fathers/

Files changed (2) hide show

app.py +15 -1
prepare_data.py +19 -5

app.py CHANGED Viewed

@@ -66,6 +66,15 @@ commentary_embeddings = {}
 commentary_metadata = {}
 # Book and Father mappings
 NEW_TESTAMENT_BOOKS = [
     "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
     "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
@@ -73,6 +82,8 @@ NEW_TESTAMENT_BOOKS = [
     "2peter", "1john", "2john", "3john", "jude", "revelation"
 ]
 CHURCH_FATHERS = [
     "Augustine of Hippo",
     "Athanasius of Alexandria",
@@ -82,7 +93,10 @@ CHURCH_FATHERS = [
     "Cyril of Alexandria",
     "Irenaeus",
     "Cyprian",
-    "Origen of Alexandria"
 ]

 commentary_metadata = {}
 # Book and Father mappings
+OLD_TESTAMENT_BOOKS = [
+    "genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
+    "1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
+    "nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
+    "isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
+    "obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
+    "malachi"
+]
 NEW_TESTAMENT_BOOKS = [
     "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
     "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
     "2peter", "1john", "2john", "3john", "jude", "revelation"
 ]
+ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
 CHURCH_FATHERS = [
     "Augustine of Hippo",
     "Athanasius of Alexandria",
     "Cyril of Alexandria",
     "Irenaeus",
     "Cyprian",
+    "Origen of Alexandria",
+    "Jerome",
+    "John Chrysostom",
+    "Ambrose of Milan"
 ]

prepare_data.py CHANGED Viewed

@@ -9,6 +9,15 @@ from pathlib import Path
 import argparse
 NEW_TESTAMENT_BOOKS = [
     "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
     "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
@@ -16,6 +25,8 @@ NEW_TESTAMENT_BOOKS = [
     "2peter", "1john", "2john", "3john", "jude", "revelation"
 ]
 def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
     """
@@ -37,7 +48,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
     print(f"Output directory: {output_dir}")
     print("-" * 60)
-    for book in NEW_TESTAMENT_BOOKS:
         book_dir = source_dir / book
         if not book_dir.exists():
@@ -95,7 +106,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
     print(f"\nCopy complete:")
     print(f"  Total files copied: {copied_count}")
     print(f"  Total entries: {total_entries}")
-    print(f"  Books processed: {len(NEW_TESTAMENT_BOOKS) - len(missing_books)}/{len(NEW_TESTAMENT_BOOKS)}")
     if missing_books:
         print(f"  Missing books: {', '.join(missing_books)}")
@@ -148,7 +159,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
         "Cyril of Alexandria",
         "Irenaeus",
         "Cyprian",
-        "Origen of Alexandria"
     ]
     query = """
@@ -163,10 +177,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
         AND source_title != ''
     """.format(
         ','.join('?' * len(top_authors)),
-        ','.join('?' * len(NEW_TESTAMENT_BOOKS))
     )
-    cursor.execute(query, top_authors + NEW_TESTAMENT_BOOKS)
     rows = cursor.fetchall()
     print(f"Found {len(rows)} commentary entries to process")

 import argparse
+OLD_TESTAMENT_BOOKS = [
+    "genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
+    "1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
+    "nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
+    "isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
+    "obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
+    "malachi"
+]
 NEW_TESTAMENT_BOOKS = [
     "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
     "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
     "2peter", "1john", "2john", "3john", "jude", "revelation"
 ]
+ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
 def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
     """
     print(f"Output directory: {output_dir}")
     print("-" * 60)
+    for book in ALL_BOOKS:
         book_dir = source_dir / book
         if not book_dir.exists():
     print(f"\nCopy complete:")
     print(f"  Total files copied: {copied_count}")
     print(f"  Total entries: {total_entries}")
+    print(f"  Books processed: {len(ALL_BOOKS) - len(missing_books)}/{len(ALL_BOOKS)}")
     if missing_books:
         print(f"  Missing books: {', '.join(missing_books)}")
         "Cyril of Alexandria",
         "Irenaeus",
         "Cyprian",
+        "Origen of Alexandria",
+        "Jerome",
+        "John Chrysostom",
+        "Ambrose of Milan"
     ]
     query = """
         AND source_title != ''
     """.format(
         ','.join('?' * len(top_authors)),
+        ','.join('?' * len(ALL_BOOKS))
     )
+    cursor.execute(query, top_authors + ALL_BOOKS)
     rows = cursor.fetchall()
     print(f"Found {len(rows)} commentary entries to process")