rdmlx commited on
Commit
a356e85
·
1 Parent(s): f027991

Expand to include OT books and add Jerome, John Chrysostom, and Ambrose of Milan

Browse files

- Add OLD_TESTAMENT_BOOKS (39 books) to both prepare_data.py and app.py
- Create ALL_BOOKS = OT + NT (66 books total)
- Add 3 new Church Fathers: Jerome, John Chrysostom, Ambrose of Milan
- Total of 12 Church Fathers now included
- Backup NT-only data to data-nt-only-backup-9fathers/

Files changed (2) hide show
  1. app.py +15 -1
  2. prepare_data.py +19 -5
app.py CHANGED
@@ -66,6 +66,15 @@ commentary_embeddings = {}
66
  commentary_metadata = {}
67
 
68
  # Book and Father mappings
 
 
 
 
 
 
 
 
 
69
  NEW_TESTAMENT_BOOKS = [
70
  "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
71
  "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
@@ -73,6 +82,8 @@ NEW_TESTAMENT_BOOKS = [
73
  "2peter", "1john", "2john", "3john", "jude", "revelation"
74
  ]
75
 
 
 
76
  CHURCH_FATHERS = [
77
  "Augustine of Hippo",
78
  "Athanasius of Alexandria",
@@ -82,7 +93,10 @@ CHURCH_FATHERS = [
82
  "Cyril of Alexandria",
83
  "Irenaeus",
84
  "Cyprian",
85
- "Origen of Alexandria"
 
 
 
86
  ]
87
 
88
 
 
66
  commentary_metadata = {}
67
 
68
  # Book and Father mappings
69
+ OLD_TESTAMENT_BOOKS = [
70
+ "genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
71
+ "1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
72
+ "nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
73
+ "isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
74
+ "obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
75
+ "malachi"
76
+ ]
77
+
78
  NEW_TESTAMENT_BOOKS = [
79
  "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
80
  "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
 
82
  "2peter", "1john", "2john", "3john", "jude", "revelation"
83
  ]
84
 
85
+ ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
86
+
87
  CHURCH_FATHERS = [
88
  "Augustine of Hippo",
89
  "Athanasius of Alexandria",
 
93
  "Cyril of Alexandria",
94
  "Irenaeus",
95
  "Cyprian",
96
+ "Origen of Alexandria",
97
+ "Jerome",
98
+ "John Chrysostom",
99
+ "Ambrose of Milan"
100
  ]
101
 
102
 
prepare_data.py CHANGED
@@ -9,6 +9,15 @@ from pathlib import Path
9
  import argparse
10
 
11
 
 
 
 
 
 
 
 
 
 
12
  NEW_TESTAMENT_BOOKS = [
13
  "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
14
  "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
@@ -16,6 +25,8 @@ NEW_TESTAMENT_BOOKS = [
16
  "2peter", "1john", "2john", "3john", "jude", "revelation"
17
  ]
18
 
 
 
19
 
20
  def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
21
  """
@@ -37,7 +48,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
37
  print(f"Output directory: {output_dir}")
38
  print("-" * 60)
39
 
40
- for book in NEW_TESTAMENT_BOOKS:
41
  book_dir = source_dir / book
42
 
43
  if not book_dir.exists():
@@ -95,7 +106,7 @@ def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
95
  print(f"\nCopy complete:")
96
  print(f" Total files copied: {copied_count}")
97
  print(f" Total entries: {total_entries}")
98
- print(f" Books processed: {len(NEW_TESTAMENT_BOOKS) - len(missing_books)}/{len(NEW_TESTAMENT_BOOKS)}")
99
 
100
  if missing_books:
101
  print(f" Missing books: {', '.join(missing_books)}")
@@ -148,7 +159,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
148
  "Cyril of Alexandria",
149
  "Irenaeus",
150
  "Cyprian",
151
- "Origen of Alexandria"
 
 
 
152
  ]
153
 
154
  query = """
@@ -163,10 +177,10 @@ def generate_embeddings_from_db(db_file: Path, output_dir: Path, model_name: str
163
  AND source_title != ''
164
  """.format(
165
  ','.join('?' * len(top_authors)),
166
- ','.join('?' * len(NEW_TESTAMENT_BOOKS))
167
  )
168
 
169
- cursor.execute(query, top_authors + NEW_TESTAMENT_BOOKS)
170
  rows = cursor.fetchall()
171
 
172
  print(f"Found {len(rows)} commentary entries to process")
 
9
  import argparse
10
 
11
 
12
+ OLD_TESTAMENT_BOOKS = [
13
+ "genesis", "exodus", "leviticus", "numbers", "deuteronomy", "joshua", "judges", "ruth",
14
+ "1samuel", "2samuel", "1kings", "2kings", "1chronicles", "2chronicles", "ezra",
15
+ "nehemiah", "esther", "job", "psalms", "proverbs", "ecclesiastes", "songofsolomon",
16
+ "isaiah", "jeremiah", "lamentations", "ezekiel", "daniel", "hosea", "joel", "amos",
17
+ "obadiah", "jonah", "micah", "nahum", "habakkuk", "zephaniah", "haggai", "zechariah",
18
+ "malachi"
19
+ ]
20
+
21
  NEW_TESTAMENT_BOOKS = [
22
  "matthew", "mark", "luke", "john", "acts", "romans", "1corinthians", "2corinthians",
23
  "galatians", "ephesians", "philippians", "colossians", "1thessalonians", "2thessalonians",
 
25
  "2peter", "1john", "2john", "3john", "jude", "revelation"
26
  ]
27
 
28
+ ALL_BOOKS = OLD_TESTAMENT_BOOKS + NEW_TESTAMENT_BOOKS
29
+
30
 
31
  def copy_embeddings_from_source(source_dir: Path, output_dir: Path):
32
  """
 
48
  print(f"Output directory: {output_dir}")
49
  print("-" * 60)
50
 
51
+ for book in ALL_BOOKS:
52
  book_dir = source_dir / book
53
 
54
  if not book_dir.exists():
 
106
  print(f"\nCopy complete:")
107
  print(f" Total files copied: {copied_count}")
108
  print(f" Total entries: {total_entries}")
109
+ print(f" Books processed: {len(ALL_BOOKS) - len(missing_books)}/{len(ALL_BOOKS)}")
110
 
111
  if missing_books:
112
  print(f" Missing books: {', '.join(missing_books)}")
 
159
  "Cyril of Alexandria",
160
  "Irenaeus",
161
  "Cyprian",
162
+ "Origen of Alexandria",
163
+ "Jerome",
164
+ "John Chrysostom",
165
+ "Ambrose of Milan"
166
  ]
167
 
168
  query = """
 
177
  AND source_title != ''
178
  """.format(
179
  ','.join('?' * len(top_authors)),
180
+ ','.join('?' * len(ALL_BOOKS))
181
  )
182
 
183
+ cursor.execute(query, top_authors + ALL_BOOKS)
184
  rows = cursor.fetchall()
185
 
186
  print(f"Found {len(rows)} commentary entries to process")