Spaces:
Sleeping
Sleeping
rdmlx commited on
Commit ·
a356e85
1
Parent(s): f027991
Expand to include OT books and add Jerome, John Chrysostom, and Ambrose of Milan
Browse files- Add OLD_TESTAMENT_BOOKS (39 books) to both prepare_data.py and app.py
- Create ALL_BOOKS = OT + NT (66 books total)
- Add 3 new Church Fathers: Jerome, John Chrysostom, Ambrose of Milan
- Total of 12 Church Fathers now included
- Backup NT-only data to data-nt-only-backup-9fathers/
- app.py +15 -1
- prepare_data.py +19 -5
app.py
CHANGED
|
@@ -66,6 +66,15 @@ commentary_embeddings = {}
|
|
| 66 |
commentary_metadata = {}
|
| 67 |
|
| 68 |
# Book and Father mappings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
NEW_TESTAMENT_BOOKS = [
|
| 70 |
"matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
|
| 71 |
"galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
|
|
@@ -73,6 +82,8 @@ NEW_TESTAMENT_BOOKS = [
|
|
| 73 |
"2peter", "1john", "2john", "3john", "jude", "revelation"
|
| 74 |
]
|
| 75 |
|
|
|
|
|
|
|
| 76 |
CHURCH_FATHERS = [
|
| 77 |
"Augustine of Hippo",
|
| 78 |
"Athanasius of Alexandria",
|
|
@@ -82,7 +93,10 @@ CHURCH_FATHERS = [
|
|
| 82 |
"Cyril of Alexandria",
|
| 83 |
"Irenaeus",
|
| 84 |
"Cyprian",
|
| 85 |
-
"Origen of Alexandria"
|
|
|
|
|
|
|
|
|
|
| 86 |
]
|
| 87 |
|
| 88 |
|
|
|
|
| 66 |
commentary_metadata = {}
|
| 67 |
|
| 68 |
# Book and Father mappings
|
| 69 |
+
OLD_TESTAMENT_BOOKS = [
|
| 70 |
+
"genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
|
| 71 |
+
"1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
|
| 72 |
+
"nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
|
| 73 |
+
"isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
|
| 74 |
+
"obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
|
| 75 |
+
"malachi"
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
NEW_TESTAMENT_BOOKS = [
|
| 79 |
"matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
|
| 80 |
"galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
|
|
|
|
| 82 |
"2peter", "1john", "2john", "3john", "jude", "revelation"
|
| 83 |
]
|
| 84 |
|
| 85 |
+
ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
|
| 86 |
+
|
| 87 |
CHURCH_FATHERS = [
|
| 88 |
"Augustine of Hippo",
|
| 89 |
"Athanasius of Alexandria",
|
|
|
|
| 93 |
"Cyril of Alexandria",
|
| 94 |
"Irenaeus",
|
| 95 |
"Cyprian",
|
| 96 |
+
"Origen of Alexandria",
|
| 97 |
+
"Jerome",
|
| 98 |
+
"John Chrysostom",
|
| 99 |
+
"Ambrose of Milan"
|
| 100 |
]
|
| 101 |
|
| 102 |
|
prepare_data.py
CHANGED
|
@@ -9,6 +9,15 @@ from pathlib import Path
|
|
| 9 |
import argparse
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
NEW_TESTAMENT_BOOKS = [
|
| 13 |
"matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
|
| 14 |
"galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
|
|
@@ -16,6 +25,8 @@ NEW_TESTAMENT_BOOKS = [
|
|
| 16 |
"2peter", "1john", "2john", "3john", "jude", "revelation"
|
| 17 |
]
|
| 18 |
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
|
| 21 |
"""
|
|
@@ -37,7 +48,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
|
|
| 37 |
print(f"Output directory: {output_dir}")
|
| 38 |
print("-" * 60)
|
| 39 |
|
| 40 |
-
for book in
|
| 41 |
book_dir = source_dir / book
|
| 42 |
|
| 43 |
if not book_dir.exists():
|
|
@@ -95,7 +106,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
|
|
| 95 |
print(f"\nCopy complete:")
|
| 96 |
print(f" Total files copied: {copied_count}")
|
| 97 |
print(f" Total entries: {total_entries}")
|
| 98 |
-
print(f" Books processed: {len(
|
| 99 |
|
| 100 |
if missing_books:
|
| 101 |
print(f" Missing books: {', '.join(missing_books)}")
|
|
@@ -148,7 +159,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
|
|
| 148 |
"Cyril of Alexandria",
|
| 149 |
"Irenaeus",
|
| 150 |
"Cyprian",
|
| 151 |
-
"Origen of Alexandria"
|
|
|
|
|
|
|
|
|
|
| 152 |
]
|
| 153 |
|
| 154 |
query = """
|
|
@@ -163,10 +177,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
|
|
| 163 |
AND source_title != ''
|
| 164 |
""".format(
|
| 165 |
','.join('?' * len(top_authors)),
|
| 166 |
-
','.join('?' * len(
|
| 167 |
)
|
| 168 |
|
| 169 |
-
cursor.execute(query, top_authors +
|
| 170 |
rows = cursor.fetchall()
|
| 171 |
|
| 172 |
print(f"Found {len(rows)} commentary entries to process")
|
|
|
|
| 9 |
import argparse
|
| 10 |
|
| 11 |
|
| 12 |
+
OLD_TESTAMENT_BOOKS = [
|
| 13 |
+
"genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
|
| 14 |
+
"1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
|
| 15 |
+
"nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
|
| 16 |
+
"isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
|
| 17 |
+
"obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
|
| 18 |
+
"malachi"
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
NEW_TESTAMENT_BOOKS = [
|
| 22 |
"matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
|
| 23 |
"galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
|
|
|
|
| 25 |
"2peter", "1john", "2john", "3john", "jude", "revelation"
|
| 26 |
]
|
| 27 |
|
| 28 |
+
ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
|
| 29 |
+
|
| 30 |
|
| 31 |
def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
|
| 32 |
"""
|
|
|
|
| 48 |
print(f"Output directory: {output_dir}")
|
| 49 |
print("-" * 60)
|
| 50 |
|
| 51 |
+
for book in ALL_BOOKS:
|
| 52 |
book_dir = source_dir / book
|
| 53 |
|
| 54 |
if not book_dir.exists():
|
|
|
|
| 106 |
print(f"\nCopy complete:")
|
| 107 |
print(f" Total files copied: {copied_count}")
|
| 108 |
print(f" Total entries: {total_entries}")
|
| 109 |
+
print(f" Books processed: {len(ALL_BOOKS) - len(missing_books)}/{len(ALL_BOOKS)}")
|
| 110 |
|
| 111 |
if missing_books:
|
| 112 |
print(f" Missing books: {', '.join(missing_books)}")
|
|
|
|
| 159 |
"Cyril of Alexandria",
|
| 160 |
"Irenaeus",
|
| 161 |
"Cyprian",
|
| 162 |
+
"Origen of Alexandria",
|
| 163 |
+
"Jerome",
|
| 164 |
+
"John Chrysostom",
|
| 165 |
+
"Ambrose of Milan"
|
| 166 |
]
|
| 167 |
|
| 168 |
query = """
|
|
|
|
| 177 |
AND source_title != ''
|
| 178 |
""".format(
|
| 179 |
','.join('?' * len(top_authors)),
|
| 180 |
+
','.join('?' * len(ALL_BOOKS))
|
| 181 |
)
|
| 182 |
|
| 183 |
+
cursor.execute(query, top_authors + ALL_BOOKS)
|
| 184 |
rows = cursor.fetchall()
|
| 185 |
|
| 186 |
print(f"Found {len(rows)} commentary entries to process")
|