EphAsad commited on
Commit
b437116
·
verified ·
1 Parent(s): 1bce166

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +43 -35
  2. .space_config.json +4 -0
  3. README.md +10 -10
  4. app.py +978 -0
  5. requirements.txt +25 -0
.gitattributes CHANGED
@@ -1,35 +1,43 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/bacteria_db.xlsx filter=lfs diff=lfs merge=lfs -text
37
+ models/genus_xgb.json filter=lfs diff=lfs merge=lfs -text
38
+ eph filter=lfs diff=lfs merge=lfs -text
39
+ eph.jpeg filter=lfs diff=lfs merge=lfs -text
40
+ static/eph.jpeg filter=lfs diff=lfs merge=lfs -text
41
+ data/gold_tests.json filter=lfs diff=lfs merge=lfs -text
42
+ training/gold_tests.json filter=lfs diff=lfs merge=lfs -text
43
+ data/rag/index/kb_index.json filter=lfs diff=lfs merge=lfs -text
.space_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sdk": "streamlit",
3
+ "python": "3.10"
4
+ }
README.md CHANGED
@@ -1,10 +1,10 @@
1
- ---
2
- title: BactKing
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- ---
 
1
+ ---
2
+ title: BactKing
3
+ emoji: 💻
4
+ colorFrom: green
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
app.py ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # ============================================================
3
+ # BactAI-D — Microbiology Identification (LLM-Toggle + RAG)
4
+ #
5
+ # - LLM parser OFF by default (safe for HF Spaces)
6
+ # - Checkbox to enable LLM parser:
7
+ # "Enable LLM Parser (Phi-3 Mini — Only Applicable Locally)"
8
+ # - Tri-Fusion + ML hybrid identification
9
+ # - Hybrid weighting:
10
+ # * If ML >= 0.90 → 0.3 * Tri-Fusion + 0.7 * ML
11
+ # * Else → 0.5 * Tri-Fusion + 0.5 * ML
12
+ # - Confidence bands:
13
+ # <65% → Low Discrimination
14
+ # 65–79 → Acceptable Identification
15
+ # 80–89 → Good Identification
16
+ # ≥90 → Excellent Identification
17
+ # - RAG (Mistral-7B-Instruct) always enabled for top genera
18
+ # - Commit-to-HF kept with all key artefacts
19
+ #
20
+ # TOP-5 TABLE (DECISION AID) RULE:
21
+ # ✅ Confidence is assigned AFTER unified scoring.
22
+ # ✅ Only Rank #1 may be Acceptable/Good/Excellent.
23
+ # ✅ If Rank #1 is Low Discrimination, ALL ranks are Low Discrimination.
24
+ # ✅ Ranks #2–#5 are always Low Discrimination (even if their % is high).
25
+ #
26
+ # TOP-5 TABLE (DECISION AID) COLUMNS:
27
+ # ✅ Genus
28
+ # ✅ Probability % (within TOP-5, sums to 100%)
29
+ # ✅ Probability (Odds) — human-friendly ("1 in X")
30
+ # ✅ Confidence (decision_band logic above)
31
+ # ============================================================
32
+
33
+ from __future__ import annotations
34
+
35
+ import os
36
+ from datetime import datetime
37
+ from typing import Dict, Any, List, Tuple
38
+
39
+ import pandas as pd
40
+ import gradio as gr
41
+
42
+ # ============================================================
43
+ # ENGINE IMPORTS
44
+ # ============================================================
45
+
46
+ from engine.bacteria_identifier import BacteriaIdentifier
47
+ from engine.parser_rules import parse_text_rules
48
+ from engine.parser_ext import parse_text_extended
49
+ from engine.parser_fusion import parse_text_fused
50
+
51
+ # We will *not* import parser_llm directly here.
52
+ # LLM usage is controlled via the `use_llm` flag passed into parse_text_fused
53
+
54
+ HAS_LLM = True # Architecturally supported; UI toggle decides whether to use it.
55
+
56
+ # ============================================================
57
+ # ML GENUS PREDICTOR
58
+ # ============================================================
59
+
60
+ try:
61
+ from engine.genus_predictor import predict_genus_from_fused
62
+ HAS_GENUS_ML = True
63
+ except Exception as e:
64
+ print(f"[app] ML predictor unavailable: {type(e).__name__}: {e}")
65
+ HAS_GENUS_ML = False
66
+
67
+ # ============================================================
68
+ # TRAINING MODULES
69
+ # ============================================================
70
+
71
+ try:
72
+ from training.parser_eval import run_parser_eval
73
+ HAS_PARSER_EVAL = True
74
+ except Exception as e:
75
+ print(f"[app] parser_eval unavailable: {type(e).__name__}: {e}")
76
+ HAS_PARSER_EVAL = False
77
+
78
+ try:
79
+ from training.gold_trainer import train_from_gold
80
+ HAS_GOLD_TRAINER = True
81
+ except Exception as e:
82
+ print(f"[app] gold_trainer unavailable: {type(e).__name__}: {e}")
83
+ HAS_GOLD_TRAINER = False
84
+
85
+ try:
86
+ from training.field_weight_trainer import train_field_weights
87
+ HAS_FIELD_WEIGHT_TRAINER = True
88
+ except Exception as e:
89
+ print(f"[app] field_weight_trainer unavailable: {type(e).__name__}: {e}")
90
+ HAS_FIELD_WEIGHT_TRAINER = False
91
+
92
+ try:
93
+ from engine.train_genus_model import train_genus_model
94
+ HAS_GENUS_TRAINER = True
95
+ except Exception as e:
96
+ print(f"[app] genus trainer unavailable: {type(e).__name__}: {e}")
97
+ HAS_GENUS_TRAINER = False
98
+
99
+ # ============================================================
100
+ # RAG INDEX BUILDER
101
+ # ============================================================
102
+
103
+ try:
104
+ from training.rag_index_builder import build_rag_index
105
+ HAS_RAG_INDEX_BUILDER = True
106
+ except Exception as e:
107
+ print(f"[app] rag_index_builder unavailable: {type(e).__name__}: {e}")
108
+ HAS_RAG_INDEX_BUILDER = False
109
+
110
+ # ============================================================
111
+ # PHASE 1 — OVERALL RANKER
112
+ # ============================================================
113
+
114
+ from scoring.overall_ranker import compute_overall_scores
115
+
116
+ # ============================================================
117
+ # DIAGNOSTIC ANCHORS (OVERRIDES)
118
+ # ============================================================
119
+
120
+ from scoring.diagnostic_anchors import apply_diagnostic_overrides
121
+
122
+ # ============================================================
123
+ # RAG IMPORTS (Mistral + Retriever)
124
+ # ============================================================
125
+
126
+ from rag.rag_retriever import retrieve_rag_context
127
+ from rag.rag_generator import generate_genus_rag_explanation
128
+ from rag.species_scorer import score_species_for_genus
129
+
130
+ # ============================================================
131
+ # DATA LOADING
132
+ # ============================================================
133
+
134
+ def load_db() -> Tuple[pd.DataFrame, str]:
135
+ primary = os.path.join("data", "bacteria_db.xlsx")
136
+ fallback = "bacteria_db.xlsx"
137
+
138
+ if os.path.exists(primary):
139
+ path = primary
140
+ elif os.path.exists(fallback):
141
+ path = fallback
142
+ else:
143
+ raise FileNotFoundError(
144
+ "bacteria_db.xlsx not found in 'data/' or project root."
145
+ )
146
+
147
+ df = pd.read_excel(path)
148
+ df.columns = [c.strip() for c in df.columns]
149
+ mtime = os.path.getmtime(path)
150
+ return df, datetime.fromtimestamp(mtime).strftime("%Y-%m-%d")
151
+
152
+
153
+ DB, DB_LAST_UPDATED = load_db()
154
+ ENG = BacteriaIdentifier(DB)
155
+
156
+ # ============================================================
157
+ # CONFIDENCE BANDS (FINAL CONTRACT)
158
+ # ============================================================
159
+
160
+ def _confidence_band_local(p: float) -> str:
161
+ """
162
+ Confidence band based on the FINAL contract:
163
+ <0.65 -> Low Discrimination
164
+ 0.65-0.79 -> Acceptable Identification
165
+ 0.80-0.89 -> Good Identification
166
+ >=0.90 -> Excellent Identification
167
+ """
168
+ if p >= 0.90:
169
+ return "Excellent Identification"
170
+ if p >= 0.80:
171
+ return "Good Identification"
172
+ if p >= 0.65:
173
+ return "Acceptable Identification"
174
+ return "Low Discrimination"
175
+
176
+
177
+ def _apply_top5_decision_confidence(unified_ranking: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
178
+ """
179
+ TOP-5 TABLE DECISION RULE:
180
+ - Only rank #1 can be Acceptable/Good/Excellent.
181
+ - If rank #1 is Low Discrimination -> ALL ranks Low Discrimination.
182
+ - Ranks #2-#5 ALWAYS Low Discrimination.
183
+ We store this as:
184
+ item["decision_band"] (for the top-5 table + UI labels if desired)
185
+ """
186
+ if not unified_ranking:
187
+ return unified_ranking
188
+
189
+ # Determine rank-1 band based on unified combined_score
190
+ top = unified_ranking[0]
191
+ top_score = float(top.get("combined_score", 0.0) or 0.0)
192
+ top_band = _confidence_band_local(top_score)
193
+
194
+ if top_band == "Low Discrimination":
195
+ # All LD
196
+ for item in unified_ranking:
197
+ item["decision_band"] = "Low Discrimination"
198
+ return unified_ranking
199
+
200
+ # Rank1 gets its true band; everyone else forced LD
201
+ unified_ranking[0]["decision_band"] = top_band
202
+ for item in unified_ranking[1:]:
203
+ item["decision_band"] = "Low Discrimination"
204
+ return unified_ranking
205
+
206
+
207
+ def _format_odds_human_friendly(odds_1000: int) -> str:
208
+ """
209
+ Convert odds per 1000 into a human-friendly "1 in X".
210
+ Example:
211
+ odds_1000 = 500 -> 1 in 2
212
+ odds_1000 = 333 -> 1 in 3
213
+ odds_1000 = 125 -> 1 in 8
214
+ """
215
+ try:
216
+ o = int(odds_1000)
217
+ except Exception:
218
+ o = 0
219
+
220
+ if o <= 0:
221
+ return "—"
222
+ # 1000/o gives expected "1 in X"
223
+ x = int(round(1000.0 / float(o)))
224
+ if x <= 1:
225
+ return "1 in 1"
226
+ return f"1 in {x}"
227
+
228
+
229
+ def _safe_float(x, default: float = 0.0) -> float:
230
+ try:
231
+ return float(x)
232
+ except Exception:
233
+ return default
234
+
235
+
236
+ # ============================================================
237
+ # CORE IDENTIFICATION PIPELINE
238
+ # ============================================================
239
+
240
+ def compute_trifusion_and_ml(text: str, use_llm_parser: bool = False) -> Dict[str, Any]:
241
+ text = text or ""
242
+ if not text.strip():
243
+ return {
244
+ "error": "Please enter a description.",
245
+ "fused_fields": {},
246
+ "tri_fusion_results": [],
247
+ "tri_fusion_summary_markdown": "",
248
+ "ml_genus_results": [],
249
+ "ml_summary_markdown": "",
250
+ "unified_summary_markdown": "",
251
+ "unified_ranking": [],
252
+ "overall_scores": {},
253
+ "raw": {},
254
+ }
255
+
256
+ # 1) Tri-Fusion
257
+ try:
258
+ fusion = parse_text_fused(text, use_llm=use_llm_parser)
259
+ except TypeError:
260
+ fusion = parse_text_fused(text)
261
+
262
+ fused_fields = fusion.get("fused_fields", {})
263
+ results = ENG.identify(fused_fields)
264
+
265
+ # Tri-Fusion summary
266
+ tri_lines: List[str] = []
267
+ if not results:
268
+ tri_lines.append("No matches found.")
269
+ else:
270
+ tri_lines.append("Tri-Fusion Identification Results:\n")
271
+ for r in results:
272
+ blended = r.blended_confidence_percent()
273
+ core = r.confidence_percent()
274
+ true = r.true_confidence()
275
+ emoji = "🟢" if blended >= 75 else "🟡" if blended >= 50 else "🔴"
276
+ tri_lines.append(
277
+ f"- **{r.genus}** — {emoji} {blended}% "
278
+ f"(Core: {core}%, True: {true}%)"
279
+ )
280
+ tri_md = "\n".join(tri_lines)
281
+
282
+ # 2) ML GENUS MODEL
283
+ ml_results_raw: List[Dict[str, Any]] = []
284
+ ml_lines: List[str] = []
285
+
286
+ if not HAS_GENUS_ML:
287
+ ml_lines.append("ML genus model not available.")
288
+ else:
289
+ try:
290
+ preds = predict_genus_from_fused(fused_fields, top_k=10)
291
+ if preds:
292
+ ml_lines.append("ML Genus Model Results (XGBoost, Stage 12D):\n")
293
+ band_emoji = {
294
+ "Excellent Identification": "🟢",
295
+ "Good Identification": "🟡",
296
+ "Acceptable Identification": "🟠",
297
+ "Low Discrimination": "🔴",
298
+ }
299
+ rank = 1
300
+ for genus, prob, band in preds:
301
+ perc = prob * 100.0
302
+ emo = band_emoji.get(band, "⚪")
303
+ ml_lines.append(
304
+ f"{rank}. **{genus}** — {emo} {perc:.1f}% ({band})"
305
+ )
306
+ ml_results_raw.append(
307
+ {
308
+ "genus": genus,
309
+ "probability": prob,
310
+ "probability_percent": perc,
311
+ "confidence_band": band,
312
+ }
313
+ )
314
+ rank += 1
315
+ else:
316
+ ml_lines.append("ML model returned no predictions.")
317
+ except Exception as e:
318
+ ml_lines.append(f"ML genus model error: {type(e).__name__}: {e}")
319
+
320
+ ml_md = "\n".join(ml_lines)
321
+
322
+ # 3) UNIFIED HYBRID RANKING
323
+ unified_lines: List[str] = []
324
+ unified_ranking: List[Dict[str, Any]] = []
325
+
326
+ tri_blended_by_genus: Dict[str, float] = {}
327
+ for r in results:
328
+ g = str(r.genus)
329
+ s = (r.blended_confidence_percent() or 0.0) / 100.0
330
+ if s > tri_blended_by_genus.get(g, 0.0):
331
+ tri_blended_by_genus[g] = s
332
+
333
+ ml_by_genus: Dict[str, float] = {
334
+ item["genus"]: float(item["probability"]) for item in ml_results_raw
335
+ }
336
+
337
+ all_genera = set(tri_blended_by_genus.keys()) | set(ml_by_genus.keys())
338
+
339
+ band_emoji = {
340
+ "Excellent Identification": "🟢",
341
+ "Good Identification": "🟡",
342
+ "Acceptable Identification": "🟠",
343
+ "Low Discrimination": "🔴",
344
+ }
345
+
346
+ if all_genera:
347
+ # Build raw unified scores
348
+ for g in all_genera:
349
+ tf = tri_blended_by_genus.get(g, 0.0)
350
+ ml = ml_by_genus.get(g, 0.0)
351
+
352
+ if ml <= 0.01:
353
+ combined = 0.01 * tf + 0.99 * ml
354
+ elif ml >= 0.90:
355
+ combined = 0.3 * tf + 0.7 * ml
356
+ else:
357
+ combined = 0.5 * tf + 0.5 * ml
358
+
359
+ # TF Gate
360
+ TF_GATE = 0.30
361
+ if tf <= TF_GATE:
362
+ combined = min(combined, tf)
363
+
364
+ band = _confidence_band_local(combined)
365
+ unified_ranking.append(
366
+ {
367
+ "genus": g,
368
+ "combined_score": combined,
369
+ "combined_percent": combined * 100.0,
370
+ "tri_fusion_blended_percent": tf * 100.0,
371
+ "ml_prob_percent": ml * 100.0,
372
+ "ml_band": band, # band based on combined score
373
+ }
374
+ )
375
+
376
+ # Apply diagnostic anchor overrides
377
+ unified_ranking = apply_diagnostic_overrides(text, unified_ranking)
378
+
379
+ # Sort after overrides
380
+ unified_ranking.sort(
381
+ key=lambda d: d.get("combined_score", 0.0), reverse=True
382
+ )
383
+
384
+ # Apply TOP-5 decision confidence rule (rank1-only)
385
+ unified_ranking = _apply_top5_decision_confidence(unified_ranking)
386
+
387
+ # Build markdown summary
388
+ unified_lines.append("Unified Hybrid Ranking (Tri-Fusion + ML Genus Model):\n")
389
+ for rank, item in enumerate(unified_ranking[:10], start=1):
390
+ g = item["genus"]
391
+ combined = item["combined_score"]
392
+ band = item.get("decision_band") or item.get("ml_band") or "Low Discrimination"
393
+ emo = band_emoji.get(band, "⚪")
394
+ tf = item["tri_fusion_blended_percent"] / 100.0
395
+ ml = item["ml_prob_percent"] / 100.0
396
+ unified_lines.append(
397
+ f"{rank}. **{g}** — {emo} Combined: {combined*100:.1f}% "
398
+ f"(Tri-Fusion: {tf*100:.1f}% | ML: {ml*100:.1f}% — {band})"
399
+ )
400
+
401
+ unified_md = "\n".join(unified_lines)
402
+
403
+ # 4) OVERALL RANKER (TOP-5 NORMALISATION)
404
+ try:
405
+ # NOTE: keep this contract stable for now; we will refactor overall_ranker next.
406
+ tri_scores_map = {item["genus"]: float(item.get("combined_score", 0.0) or 0.0) for item in unified_ranking}
407
+
408
+ overall_scores = compute_overall_scores(
409
+ ml_scores=ml_results_raw,
410
+ tri_scores=tri_scores_map,
411
+ top_k=5,
412
+ )
413
+ except Exception as e:
414
+ overall_scores = {
415
+ "error": f"overall_ranker failed: {type(e).__name__}: {e}",
416
+ "overall": [],
417
+ "normalized_share_percent": [],
418
+ "probabilities_1000": [],
419
+ }
420
+
421
+ return {
422
+ "error": None,
423
+ "fused_fields": fused_fields,
424
+ "tri_fusion_results": results,
425
+ "tri_fusion_summary_markdown": tri_md,
426
+ "ml_genus_results": ml_results_raw,
427
+ "ml_summary_markdown": ml_md,
428
+ "unified_summary_markdown": unified_md,
429
+ "unified_ranking": unified_ranking,
430
+ "overall_scores": overall_scores,
431
+ "raw": fusion,
432
+ }
433
+
434
+
435
+ # ============================================================
436
+ # GENUS CARD RENDERER
437
+ # ============================================================
438
+
439
+ def _genus_card_markdown(
440
+ item: Dict[str, Any],
441
+ rank: int,
442
+ rag_text: str | None = None,
443
+ ) -> str:
444
+ genus = item["genus"]
445
+ combined = item["combined_percent"]
446
+ tf = item["tri_fusion_blended_percent"]
447
+ ml = item["ml_prob_percent"]
448
+
449
+ # Show the DECISION confidence band (rank1-only rule)
450
+ decision_band = item.get("decision_band") or item.get("ml_band") or "Low Discrimination"
451
+
452
+ if combined >= 80:
453
+ bar_color = "#1e88e5"
454
+ elif combined >= 65:
455
+ bar_color = "#43a047"
456
+ elif combined >= 50:
457
+ bar_color = "#fb8c00"
458
+ else:
459
+ bar_color = "#e53935"
460
+
461
+ bar_html = f"""
462
+ <div style="background:rgba(255,255,255,0.08); border-radius:6px; padding:4px; margin-top:4px; margin-bottom:8px;">
463
+ <div style="height:12px; width:{combined:.1f}%; max-width:100%; background:{bar_color}; border-radius:4px;"></div>
464
+ </div>
465
+ """
466
+
467
+ rag_section = ""
468
+ if rag_text:
469
+ rag_section = f"""
470
+ #### RAG Interpretation (Genus-Level)
471
+
472
+ {rag_text}
473
+ """
474
+
475
+ return f"""
476
+ ### Rank {rank}: **{genus}**
477
+
478
+ {bar_html}
479
+
480
+ - **Combined Score:** {combined:.1f}%
481
+ - **Tri-Fusion (Blended):** {tf:.1f}%
482
+ - **ML Probability:** {ml:.1f}%
483
+ - **Decision Confidence:** {decision_band}
484
+
485
+ {rag_section}
486
+ """
487
+
488
+
489
+ # ============================================================
490
+ # IDENTIFICATION CALLBACK
491
+ # ============================================================
492
+
493
+ def run_identification(text: str, use_llm_parser: bool):
494
+ result = compute_trifusion_and_ml(text, use_llm_parser=use_llm_parser)
495
+
496
+ # DEBUG payload
497
+ debug_payload = {
498
+ "fused_fields": result["fused_fields"],
499
+ "tri_fusion_summary_markdown": result["tri_fusion_summary_markdown"],
500
+ "ml_genus_results": result["ml_genus_results"],
501
+ "unified_summary_markdown": result["unified_summary_markdown"],
502
+ "unified_ranking": result["unified_ranking"],
503
+ "overall_scores": result["overall_scores"],
504
+ "raw": result["raw"],
505
+ }
506
+
507
+ ranking = result["unified_ranking"] or []
508
+
509
+ # ------------------------------------------------------------
510
+ # Top-5 Decision Table (ROBUST, APP-SIDE)
511
+ # ------------------------------------------------------------
512
+ # We do NOT trust overall_ranker yet.
513
+ # We defensively reconstruct probabilities so the table always fills.
514
+ # ------------------------------------------------------------
515
+
516
+ top5_rows: List[List[str]] = []
517
+
518
+ overall = result.get("overall_scores") or {}
519
+ overall_list = overall.get("overall") or []
520
+ probs_1000_list = overall.get("probabilities_1000") or []
521
+
522
+ share_by_genus: Dict[str, float] = {}
523
+ odds_by_genus: Dict[str, int] = {}
524
+
525
+ # 1) Normalized share
526
+ for it in overall_list:
527
+ if not isinstance(it, dict):
528
+ continue
529
+ g = str(it.get("genus") or "").strip()
530
+ if not g:
531
+ continue
532
+
533
+ share = (
534
+ it.get("normalized_share")
535
+ or it.get("share")
536
+ or it.get("normalized_share_percent")
537
+ )
538
+
539
+ if share is not None:
540
+ s = _safe_float(share)
541
+ if s > 1.0: # percent → fraction
542
+ s = s / 100.0
543
+ share_by_genus[g] = max(0.0, min(1.0, s))
544
+
545
+ # 2) Odds /1000
546
+ for it in probs_1000_list:
547
+ if not isinstance(it, dict):
548
+ continue
549
+ g = str(it.get("genus") or "").strip()
550
+ if not g:
551
+ continue
552
+ o = it.get("odds_1000") or it.get("prob_1000")
553
+ if isinstance(o, (int, float)):
554
+ odds_by_genus[g] = int(round(o))
555
+
556
+ # 3) HARD FALLBACK — derive from unified_ranking if needed
557
+ if not share_by_genus:
558
+ total = sum(float(item.get("combined_score", 0.0) or 0.0) for item in ranking[:5]) or 1.0
559
+ for item in ranking[:5]:
560
+ genus = str(item.get("genus") or "").strip()
561
+ if genus:
562
+ share_by_genus[genus] = float(item.get("combined_score", 0.0) or 0.0) / total
563
+
564
+ # 4) Build table rows IN RANK ORDER
565
+ top1_band = ranking[0].get("decision_band") if ranking else "Low Discrimination"
566
+
567
+ for idx, item in enumerate(ranking[:5], start=1):
568
+ genus = str(item.get("genus") or "").strip()
569
+
570
+ share = share_by_genus.get(genus, 0.0)
571
+ # If overall_ranker doesn't provide odds, approximate odds_1000 from share.
572
+ odds_1000 = odds_by_genus.get(genus, int(round(share * 1000)))
573
+
574
+ prob_pct = f"{share * 100.0:.2f}%"
575
+ odds_text = _format_odds_human_friendly(odds_1000)
576
+
577
+ if top1_band == "Low Discrimination":
578
+ confidence = "Low Discrimination"
579
+ else:
580
+ confidence = top1_band if idx == 1 else "Low Discrimination"
581
+
582
+ top5_rows.append([
583
+ genus,
584
+ prob_pct,
585
+ odds_text,
586
+ confidence,
587
+ ])
588
+
589
+ # RAG explanations for top genera (rank 1)
590
+ rag_summaries: Dict[str, str] = {}
591
+ if ranking:
592
+ top_item = ranking[0]
593
+ genus = top_item["genus"]
594
+
595
+ try:
596
+ ctx = retrieve_rag_context(
597
+ phenotype_text=text,
598
+ target_genus=genus,
599
+ top_k=5,
600
+ parsed_fields=result["fused_fields"], # 🔑 enables species scoring
601
+ )
602
+
603
+ # 🔍 HF SPACES DEBUG LOGGING
604
+ print("\n" + "=" * 80)
605
+ print("RAG DEBUG — GENERATOR INPUT")
606
+ print("=" * 80)
607
+
608
+ print("\n[PHENOTYPE]")
609
+ print(text)
610
+
611
+ print("\n[LLM CONTEXT]")
612
+ print(ctx.get("llm_context_shaped", ""))
613
+
614
+ print("\n[DEBUG CONTEXT]")
615
+ print(ctx.get("debug_context", ""))
616
+
617
+ print("=" * 80 + "\n")
618
+ # 🔍 END DEBUG
619
+
620
+ explanation = generate_genus_rag_explanation(
621
+ phenotype_text=text,
622
+ rag_context=ctx.get("llm_context_shaped", "") or ctx.get("llm_context", ""),
623
+ genus=genus,
624
+ )
625
+
626
+ # -------------------------------
627
+ # SPECIES BEST MATCH
628
+ # -------------------------------
629
+ try:
630
+ species_out = score_species_for_genus(
631
+ target_genus=genus,
632
+ parsed_fields=result["fused_fields"],
633
+ top_n=1,
634
+ )
635
+ ranked = species_out.get("ranked", []) if isinstance(species_out, dict) else []
636
+ if ranked:
637
+ best = ranked[0]
638
+ full_name = str(best.get("full_name") or "").strip()
639
+ score = best.get("score")
640
+ if full_name:
641
+ if isinstance(score, (int, float)):
642
+ explanation += f"\n\n**Species Best Match:** {full_name} ({float(score) * 100.0:.1f}%)"
643
+ else:
644
+ explanation += f"\n\n**Species Best Match:** {full_name}"
645
+ else:
646
+ explanation += "\n\n**Species Best Match:** Not specified"
647
+ except Exception:
648
+ explanation += "\n\n**Species Best Match:** Not specified"
649
+
650
+ rag_summaries[genus] = explanation
651
+ except Exception as e:
652
+ rag_summaries[genus] = f"(RAG error: {type(e).__name__}: {e})"
653
+
654
+ # Accordions
655
+ accordion_updates = []
656
+ markdown_updates = []
657
+ for _ in range(5):
658
+ accordion_updates.append(gr.update(visible=False, open=False, label=""))
659
+ markdown_updates.append("")
660
+
661
+ for idx, item in enumerate(ranking[:5]):
662
+ decision_band = item.get("decision_band") or "Low Discrimination"
663
+ label = f"{item['genus']} — {item['combined_percent']:.1f}% — {decision_band}"
664
+ accordion_updates[idx] = gr.update(
665
+ visible=True,
666
+ open=(idx == 0),
667
+ label=label,
668
+ )
669
+ rag_text = rag_summaries.get(item["genus"])
670
+ markdown_updates[idx] = _genus_card_markdown(
671
+ item,
672
+ rank=idx + 1,
673
+ rag_text=rag_text,
674
+ )
675
+
676
+ return debug_payload, top5_rows, *accordion_updates, *markdown_updates
677
+
678
+
679
+ # ============================================================
680
+ # PARSER DEBUG CALLBACKS
681
+ # ============================================================
682
+
683
+ def run_rule_parser(text: str):
684
+ return gr.update(visible=True, open=True), parse_text_rules(text or "")
685
+
686
+ def run_extended_parser(text: str):
687
+ return gr.update(visible=True, open=True), parse_text_extended(text or "")
688
+
689
+ def run_trifusion_debug(text: str, use_llm_parser: bool):
690
+ result = compute_trifusion_and_ml(text or "", use_llm_parser=use_llm_parser)
691
+ return (
692
+ gr.update(visible=True, open=True),
693
+ result,
694
+ result["tri_fusion_summary_markdown"],
695
+ result["ml_summary_markdown"],
696
+ result["unified_summary_markdown"],
697
+ )
698
+
699
+
700
+ # ============================================================
701
+ # TRAINING CALLBACKS
702
+ # ============================================================
703
+
704
+ def run_parser_evaluation():
705
+ if not HAS_PARSER_EVAL:
706
+ return gr.update(visible=True, open=True), {
707
+ "ok": False,
708
+ "message": "parser_eval not available.",
709
+ }
710
+ return gr.update(visible=True, open=True), run_parser_eval(mode="rules+extended")
711
+
712
+ def run_gold_training():
713
+ if not HAS_GOLD_TRAINER:
714
+ return gr.update(visible=True, open=True), {
715
+ "ok": False,
716
+ "message": "gold_trainer not available.",
717
+ }
718
+ return gr.update(visible=True, open=True), train_from_gold()
719
+
720
+ def run_field_weight_training():
721
+ if not HAS_FIELD_WEIGHT_TRAINER:
722
+ return gr.update(visible=True, open=True), {
723
+ "ok": False,
724
+ "message": "field_weight_trainer not available.",
725
+ }
726
+ out = train_field_weights(include_llm=False)
727
+ return gr.update(visible=True, open=True), out
728
+
729
+ def run_genus_training():
730
+ if not HAS_GENUS_TRAINER:
731
+ return gr.update(visible=True, open=True), {
732
+ "ok": False,
733
+ "message": "genus trainer not available.",
734
+ }
735
+ out = train_genus_model()
736
+ return gr.update(visible=True, open=True), out
737
+
738
+ def run_rag_index_builder():
739
+ if not HAS_RAG_INDEX_BUILDER:
740
+ return gr.update(visible=True, open=True), {
741
+ "ok": False,
742
+ "message": "rag_index_builder not available.",
743
+ }
744
+ out = build_rag_index()
745
+ return gr.update(visible=True, open=True), out
746
+
747
+ def commit_to_hf():
748
+ from training.hf_sync import push_to_hf
749
+
750
+ paths = [
751
+ "data/extended_schema.json",
752
+ "data/extended_proposals.jsonl",
753
+ "data/signals_catalog.json",
754
+ "data/field_weights.json",
755
+ "data/feature_schema.json",
756
+ "models/genus_xgb.json",
757
+ "models/genus_xgb_meta.json",
758
+ "data/llm_gold_examples.json",
759
+ "data/rag/index/kb_index.json",
760
+ ]
761
+ return push_to_hf(paths)
762
+
763
+
764
+ # ============================================================
765
+ # UI + BACKGROUND
766
+ # ============================================================
767
+
768
+ CSS = """
769
+ html, body {
770
+ height: 100%;
771
+ }
772
+ body {
773
+ background-image: url('static/eph.jpeg');
774
+ background-size: cover;
775
+ background-position: center center;
776
+ background-attachment: fixed;
777
+ font-family: 'Inter', sans-serif !important;
778
+ }
779
+ .gradio-container {
780
+ background: rgba(0, 0, 0, 0.55) !important;
781
+ backdrop-filter: blur(14px);
782
+ border-radius: 16px !important;
783
+ }
784
+ textarea, input[type="text"] {
785
+ background: rgba(255,255,255,0.05) !important;
786
+ border: 1px solid rgba(255,255,255,0.18) !important;
787
+ color: #e5e7eb !important;
788
+ border-radius: 10px !important;
789
+ }
790
+ button {
791
+ background: rgba(255,255,255,0.08) !important;
792
+ border: 1px solid rgba(255,255,255,0.20) !important;
793
+ color: #ffffff !important;
794
+ border-radius: 10px !important;
795
+ transition: 0.2s ease;
796
+ }
797
+ button:hover {
798
+ background: rgba(255,255,255,0.16) !important;
799
+ border-color: #90caf9 !important;
800
+ }
801
+ .gr-accordion {
802
+ background: rgba(255,255,255,0.06) !important;
803
+ border-radius: 12px !important;
804
+ border: 1px solid rgba(255,255,255,0.16) !important;
805
+ }
806
+ .gr-accordion:hover {
807
+ border-color: rgba(255,255,255,0.32) !important;
808
+ }
809
+ /* Ensure expanded accordion content is not clipped */
810
+ .gr-accordion .wrap,
811
+ .gr-accordion .gr-markdown {
812
+ max-height: none !important;
813
+ overflow: visible !important;
814
+ }
815
+
816
+ /* Improve readability of long RAG text */
817
+ .gr-accordion .gr-markdown {
818
+ line-height: 1.6;
819
+ padding-bottom: 12px;
820
+ }
821
+ """
822
+
823
+ # ============================================================
824
+ # BUILD UI
825
+ # ============================================================
826
+
827
+ def create_app():
828
+ with gr.Blocks(
829
+ css=CSS,
830
+ title="BactAI-D — Microbiology Identification",
831
+ ) as demo:
832
+
833
+ gr.Markdown(
834
+ f"# 🧫 BactAI-D — Microbiology Phenotype Identification\n"
835
+ f"**Database updated:** {DB_LAST_UPDATED}\n\n"
836
+ "Rule-based parsing, extended schema, ML genus prediction, and "
837
+ "RAG (knowledge base + Mistral-7B-Instruct) are combined into a "
838
+ "unified hybrid identification engine."
839
+ )
840
+
841
+ llm_toggle = gr.Checkbox(
842
+ label="Enable LLM Parser (Phi-3 Mini — Only Applicable Locally)",
843
+ value=False,
844
+ )
845
+
846
+ with gr.Tabs():
847
+
848
+ # --------------------------------------------------------
849
+ # TAB 1 — IDENTIFICATION
850
+ # --------------------------------------------------------
851
+ with gr.Tab("🧬 Identification"):
852
+
853
+ text_in = gr.Textbox(
854
+ label="Phenotype Description",
855
+ lines=8,
856
+ placeholder="Paste your microbiology description here…",
857
+ )
858
+
859
+ analyse_btn = gr.Button("🔍 Analyse & Identify")
860
+
861
+ debug_json = gr.JSON(
862
+ label="Debug: fused fields + ML + unified ranking + overall"
863
+ )
864
+
865
+ # UPDATED table (Decision Table)
866
+ top5_table = gr.Dataframe(
867
+ headers=["Genus", "Probability % (Top 5)", "Probability (Odds)", "Confidence"],
868
+ row_count=5,
869
+ col_count=4,
870
+ interactive=False,
871
+ label="Top 5 Genus Predictions (Decision Table)",
872
+ )
873
+
874
+ genus_accordions = []
875
+ genus_markdowns = []
876
+
877
+ for i in range(5):
878
+ with gr.Accordion(
879
+ f"Rank {i+1}",
880
+ visible=False,
881
+ open=False,
882
+ ) as acc:
883
+ md = gr.Markdown("")
884
+ genus_accordions.append(acc)
885
+ genus_markdowns.append(md)
886
+
887
+ analyse_btn.click(
888
+ fn=run_identification,
889
+ inputs=[text_in, llm_toggle],
890
+ outputs=[debug_json, top5_table, *genus_accordions, *genus_markdowns],
891
+ )
892
+
893
+ # --------------------------------------------------------
894
+ # TAB 2 — PARSERS DEBUG
895
+ # --------------------------------------------------------
896
+ with gr.Tab("🧪 Parsers (Debug)"):
897
+
898
+ text2 = gr.Textbox(
899
+ label="Microbiology description",
900
+ lines=6,
901
+ placeholder="Paste description…",
902
+ )
903
+
904
+ rule_btn = gr.Button("Parse (Rule Parser)")
905
+ ext_btn = gr.Button("Parse (Extended Tests)")
906
+ tri_btn = gr.Button("Parse & Identify (Tri-Fusion + ML)")
907
+
908
+ with gr.Accordion("Rule Parser Output", open=False, visible=False) as rule_panel:
909
+ rule_json = gr.JSON()
910
+
911
+ with gr.Accordion("Extended Parser Output", open=False, visible=False) as ext_panel:
912
+ ext_json = gr.JSON()
913
+
914
+ with gr.Accordion("Tri-Fusion Debug Output", open=False, visible=False) as tri_panel:
915
+ tri_json = gr.JSON()
916
+ tri_summary = gr.Markdown()
917
+ tri_ml_summary = gr.Markdown()
918
+ tri_unified_summary = gr.Markdown()
919
+
920
+ rule_btn.click(run_rule_parser, [text2], [rule_panel, rule_json])
921
+ ext_btn.click(run_extended_parser, [text2], [ext_panel, ext_json])
922
+ tri_btn.click(
923
+ run_trifusion_debug,
924
+ [text2, llm_toggle],
925
+ [tri_panel, tri_json, tri_summary, tri_ml_summary, tri_unified_summary],
926
+ )
927
+
928
+ # --------------------------------------------------------
929
+ # TAB 3 — TRAINING
930
+ # --------------------------------------------------------
931
+ with gr.Tab("📚 Training & Sync"):
932
+
933
+ gr.Markdown(
934
+ "Evaluate parsers, train from gold tests, tune parser weights, "
935
+ "train the genus-level model, build the RAG index, and commit "
936
+ "artefacts back to the HF Space repository."
937
+ )
938
+
939
+ eval_btn = gr.Button("📊 Evaluate Parsers")
940
+ train_btn = gr.Button("🧬 Train from Gold Tests")
941
+ weight_btn = gr.Button("⚖️ Train Parser Weights")
942
+ genus_btn = gr.Button("🧬 Train Genus Model")
943
+ rag_btn = gr.Button("🧱 Build RAG Index")
944
+ commit_btn = gr.Button("⬆️ Commit to HF")
945
+
946
+ with gr.Accordion("Parser Evaluation Summary", open=False, visible=False) as eval_panel:
947
+ eval_json = gr.JSON()
948
+
949
+ with gr.Accordion("Gold Training Summary", open=False, visible=False) as train_panel:
950
+ train_json = gr.JSON()
951
+
952
+ with gr.Accordion("Field Weight Training Summary", open=False, visible=False) as weight_panel:
953
+ weight_json = gr.JSON()
954
+
955
+ with gr.Accordion("Genus Model Training Summary", open=False, visible=False) as genus_panel:
956
+ genus_json = gr.JSON()
957
+
958
+ with gr.Accordion("RAG Index Build Summary", open=False, visible=False) as rag_panel:
959
+ rag_json = gr.JSON()
960
+
961
+ commit_output = gr.JSON(label="Commit Output")
962
+
963
+ eval_btn.click(run_parser_evaluation, [], [eval_panel, eval_json])
964
+ train_btn.click(run_gold_training, [], [train_panel, train_json])
965
+ weight_btn.click(run_field_weight_training, [], [weight_panel, weight_json])
966
+ genus_btn.click(run_genus_training, [], [genus_panel, genus_json])
967
+ rag_btn.click(run_rag_index_builder, [], [rag_panel, rag_json])
968
+ commit_btn.click(commit_to_hf, None, commit_output)
969
+
970
+ gr.Markdown("<br><center>Built by <b>Zain Asad</b></center><br>")
971
+
972
+ return demo
973
+
974
+
975
+ demo = create_app()
976
+
977
+ if __name__ == "__main__":
978
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Allow prebuilt CPU wheels for llama-cpp-python
2
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
3
+ --prefer-binary
4
+
5
+ pandas
6
+ numpy<2
7
+ openpyxl
8
+ fpdf
9
+ requests
10
+ huggingface_hub>=0.23.0,<1.0
11
+ transformers==4.41.0
12
+ accelerate
13
+ safetensors
14
+ gradio==5.49.1
15
+ sentencepiece
16
+ altair
17
+ torch>=2.1
18
+ einops
19
+ xgboost
20
+ scikit-learn
21
+ tokenizers
22
+ sentence-transformers>=2.6.0,<3.0
23
+ bitsandbytes
24
+ llama-cpp-python==0.2.68
25
+ #wow