LoveJesus commited on
Commit
258fc23
·
verified ·
1 Parent(s): 3b2806f

Upgrade to MiniLM-L12-v2: NDCG@10 0.1831 (+9.3% over L6)

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ # For God so loved the world that he gave his only begotten Son,
3
+ # that whoever believes in him should not perish but have eternal life. - John 3:16
4
+ tags:
5
+ - sentence-transformers
6
+ - sentence-similarity
7
+ - feature-extraction
8
+ - bible
9
+ - scripture
10
+ - topical-search
11
+ license: mit
12
+ language: en
13
+ base_model: sentence-transformers/all-MiniLM-L12-v2
14
+ datasets:
15
+ - LoveJesus/biblical-topical-dataset-chirho
16
+ metrics:
17
+ - ndcg_at_10
18
+ - mrr_at_10
19
+ - map_at_10
20
+ model-index:
21
+ - name: biblical-topical-search-chirho
22
+ results:
23
+ - task:
24
+ type: information-retrieval
25
+ name: Information Retrieval
26
+ metrics:
27
+ - name: NDCG@10
28
+ type: ndcg_at_10
29
+ value: 0.1831
30
+ - name: MRR@10
31
+ type: mrr_at_10
32
+ value: 0.1415
33
+ - name: MAP@10
34
+ type: map_at_10
35
+ value: 0.1415
36
+ - name: Accuracy@10
37
+ type: accuracy_at_10
38
+ value: 0.3188
39
+ ---
40
+
41
+ # Biblical Topical Search - Chirho
42
+
43
+ Semantic search model for finding biblical passages by topic. Fine-tuned on Nave's Topical Bible (30,000+ entries) and Treasury of Scripture Knowledge cross-references to retrieve relevant KJV verses for any theological query.
44
+
45
+ ## Model Details
46
+
47
+ - **Base Model**: sentence-transformers/all-MiniLM-L12-v2 (33M params, 384-dim)
48
+ - **Training Data**: 136,000 query-passage pairs from Nave's Topical Bible + TSK
49
+ - **Loss**: MultipleNegativesRankingLoss
50
+ - **Training**: 5 epochs, batch 16, lr 2e-5, cosine scheduler
51
+ - **Hardware**: Apple M4 Pro (MPS), ~2 hours
52
+
53
+ ## Metrics (v2 - MiniLM-L12-v2 upgrade)
54
+
55
+ | Metric | v1 (L6-v2) | v2 (L12-v2) | Change |
56
+ |--------|-----------|-------------|--------|
57
+ | **NDCG@10** | 0.1676 | **0.1831** | +9.3% |
58
+ | Accuracy@10 | 0.2882 | **0.3188** | +10.6% |
59
+ | MRR@10 | 0.1305 | **0.1415** | +8.4% |
60
+ | MAP@10 | 0.1305 | **0.1415** | +8.4% |
61
+
62
+ ### Training Trajectory (v2)
63
+
64
+ | Epoch | NDCG@10 | Accuracy@10 |
65
+ |-------|---------|-------------|
66
+ | 1 | 0.1514 | 0.257 |
67
+ | 2 | 0.1654 | 0.286 |
68
+ | 3 | 0.1749 | 0.302 |
69
+ | 4 | 0.1795 | 0.313 |
70
+ | 5 | **0.1831** | **0.319** |
71
+
72
+ ## Usage
73
+
74
+ ```python
75
+ from sentence_transformers import SentenceTransformer
76
+ import numpy as np
77
+
78
+ model = SentenceTransformer("LoveJesus/biblical-topical-search-chirho")
79
+
80
+ query = "What does the Bible say about forgiveness?"
81
+ verses = [
82
+ "For if ye forgive men their trespasses, your heavenly Father will also forgive you. - Matthew 6:14",
83
+ "In the beginning God created the heaven and the earth. - Genesis 1:1",
84
+ "As far as the east is from the west, so far hath he removed our transgressions from us. - Psalm 103:12",
85
+ ]
86
+
87
+ query_emb = model.encode([query])
88
+ verse_embs = model.encode(verses)
89
+
90
+ scores = np.dot(verse_embs, query_emb.T).flatten()
91
+ ranked = sorted(zip(scores, verses), reverse=True)
92
+ for score, verse in ranked:
93
+ print(f" {score:.3f}: {verse}")
94
+ ```
95
+
96
+ ## Part of Bible.Systems
97
+
98
+ This model is part of the [Bible.Systems](https://bible.systems) AI models project — open-source machine learning for biblical scholarship.
99
+
100
+ - **Live Demo**: [HuggingFace Space](https://huggingface.co/spaces/LoveJesus/biblical-topical-search-chirho)
101
+ - **Dataset**: [LoveJesus/biblical-topical-dataset-chirho](https://huggingface.co/datasets/LoveJesus/biblical-topical-dataset-chirho)
102
+ - **All Models**: [LoveJesus on HuggingFace](https://huggingface.co/LoveJesus)
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": null,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 384,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 1536,
17
+ "is_decoder": false,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "5.1.0",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
+ "vocab_size": 30522
30
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.2.2",
4
+ "transformers": "5.1.0",
5
+ "pytorch": "2.10.0"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
eval/Information-Retrieval_evaluation_val-topical-chirho_results.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@10
2
+ 1.0,2125,0.063,0.1414,0.1826,0.2472,0.063,0.063,0.04713333333333333,0.1414,0.036520000000000004,0.1826,0.02472,0.2472,0.11534984126984148,0.14651173930780936,0.11534984126984127
3
+ 2.0,4250,0.0686,0.1512,0.1966,0.2656,0.0686,0.0686,0.05039999999999999,0.1512,0.03932000000000001,0.1966,0.026560000000000004,0.2656,0.12370682539682545,0.1572021694578249,0.12370682539682538
4
+ 3.0,6375,0.0682,0.1584,0.2042,0.2782,0.0682,0.0682,0.0528,0.1584,0.040839999999999994,0.2042,0.027820000000000004,0.2782,0.12690301587301606,0.16260026203280784,0.12690301587301586
5
+ 4.0,8500,0.0696,0.1594,0.2062,0.2846,0.0696,0.0696,0.053133333333333324,0.1594,0.04124,0.2062,0.028460000000000003,0.2846,0.12954928571428573,0.16605838585267033,0.1295492857142857
6
+ 5.0,10625,0.0702,0.1584,0.2042,0.2882,0.0702,0.0702,0.0528,0.1584,0.04084,0.2042,0.028820000000000005,0.2882,0.1304676984126985,0.16755473492351874,0.1304676984126984
7
+ 1.0,8500,0.064,0.1458,0.1926,0.257,0.064,0.064,0.0486,0.1458,0.03852,0.1926,0.0257,0.257,0.11865857142857139,0.15137766715248815,0.11865857142857145
8
+ 2.0,17000,0.067,0.1586,0.206,0.286,0.067,0.067,0.052866666666666666,0.1586,0.0412,0.206,0.0286,0.286,0.12828412698412714,0.16544290848995974,0.128284126984127
9
+ 3.0,25500,0.073,0.1676,0.2158,0.3016,0.073,0.073,0.05586666666666666,0.1676,0.043160000000000004,0.2158,0.030160000000000003,0.3016,0.13592452380952405,0.17488514026229263,0.13592452380952383
10
+ 4.0,34000,0.0726,0.1706,0.2236,0.3128,0.0726,0.0726,0.05686666666666666,0.1706,0.04472,0.2236,0.03128000000000001,0.3128,0.1384629365079366,0.17946596649694194,0.1384629365079365
11
+ 5.0,42500,0.0764,0.1708,0.2234,0.3188,0.0764,0.0764,0.05693333333333333,0.1708,0.044680000000000004,0.2234,0.03188,0.3188,0.14151626984126994,0.18311836451970726,0.14151626984126986
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee032913bf1067aeb6d04e9e0734fc2edf20594cd275a805b56cc867376a94d
3
+ size 133462104
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 128,
9
+ "never_split": null,
10
+ "pad_token": "[PAD]",
11
+ "sep_token": "[SEP]",
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }