TEZv commited on
Commit
31d8228
·
verified ·
1 Parent(s): b47b34b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -223
app.py CHANGED
@@ -27,6 +27,10 @@ import plotly.graph_objects as go
27
  import plotly.express as px
28
  import tabulate
29
 
 
 
 
 
30
  # ========== Діагностичний друк ==========
31
  print("Gradio version:", gr.__version__)
32
  print("Starting app...")
@@ -88,84 +92,6 @@ def retry(max_attempts=3, delay=1):
88
  return wrapper
89
  return decorator
90
 
91
- # ─────────────────────────────────────────────
92
- # LAB JOURNAL (уніфікована система кодування S1)
93
- # ─────────────────────────────────────────────
94
- JOURNAL_FILE = "./lab_journal.csv"
95
- JOURNAL_CATEGORIES = [
96
- # S1-A Genomics
97
- "S1-A·R1a", # OpenVariant
98
- "S1-A·R1b", # Somatic Classifier (future)
99
- "S1-A·R2e", # Research Assistant (RAG Chatbot) <-- додано
100
- # S1-B RNA
101
- "S1-B·R1a", # BRCA2 miRNA
102
- "S1-B·R2a", # TP53 siRNA
103
- "S1-B·R3a", # lncRNA-TREM2
104
- "S1-B·R3b", # ASO Designer
105
- # S1-C Drug
106
- "S1-C·R1a", # FGFR3 RNA Drug
107
- "S1-C·R1b", # SL Drug Mapping (future)
108
- "S1-C·R2a", # Frontier (future)
109
- # S1-D LNP
110
- "S1-D·R1a", # LNP Corona
111
- "S1-D·R2a", # Flow Corona
112
- "S1-D·R3a", # LNP Brain
113
- "S1-D·R4a", # AutoCorona NLP
114
- "S1-D·R5a", # CSF/Vitreous/BM (future)
115
- "S1-D·R6a", # Corona Database
116
- # S1-E Biomarkers
117
- "S1-E·R1a", # Liquid Biopsy
118
- "S1-E·R1b", # Protein Validator (future)
119
- "S1-E·R2a", # Multi-protein Biomarkers
120
- # S1-F Rare
121
- "S1-F·R1a", # DIPG Toolkit
122
- "S1-F·R2a", # UVM Toolkit
123
- "S1-F·R3a", # pAML Toolkit
124
- # S1-G 3D
125
- "S1-G·General", # 3D Models
126
- "Manual"
127
- ]
128
-
129
- def journal_log(category: str, action: str, result: str, note: str = ""):
130
- """Log an entry with category."""
131
- ts = datetime.now().isoformat()
132
- row = [ts, category, action, result[:200], note]
133
- write_header = not os.path.exists(JOURNAL_FILE)
134
- with open(JOURNAL_FILE, "a", newline="", encoding="utf-8") as f:
135
- w = csv.writer(f)
136
- if write_header:
137
- w.writerow(["timestamp", "category", "action", "result_summary", "note"])
138
- w.writerow(row)
139
- return ts
140
-
141
- def journal_read(category: str = "All") -> str:
142
- """Read journal entries, optionally filtered by category. Returns markdown."""
143
- if not os.path.exists(JOURNAL_FILE):
144
- return "No entries yet."
145
- try:
146
- df = pd.read_csv(JOURNAL_FILE)
147
- if df.empty:
148
- return "No entries yet."
149
- if category != "All":
150
- df = df[df["category"] == category]
151
- if df.empty:
152
- return f"No entries for category: {category}"
153
- df_display = df[["timestamp", "category", "action", "result_summary", "note"]].tail(50)
154
- df_display.columns = ["Timestamp", "Category", "Action", "Result", "Observation"]
155
- return df_display.to_markdown(index=False)
156
- except Exception as e:
157
- print(f"Journal read error: {e}")
158
- return "Error reading journal."
159
-
160
- def clear_journal():
161
- try:
162
- if os.path.exists(JOURNAL_FILE):
163
- os.remove(JOURNAL_FILE)
164
- return "Journal cleared."
165
- except Exception as e:
166
- print(f"Clear journal error: {e}")
167
- return "Error clearing journal."
168
-
169
  # ========== БАЗИ ДАНИХ ==========
170
  MIRNA_DB = {
171
  "BRCA2": [
@@ -813,151 +739,6 @@ def plot_corona():
813
  )
814
  return fig
815
 
816
- # ========== RAG CHATBOT (S1-A·R2e) ==========
817
- # --- Paper corpus for RAG ---
818
- PAPER_PMIDS = [
819
- "34394960", "32251383", "29653760", "22782619", "33208369",
820
- "18809927", "22086677", "31565943", "33754708", "20461061",
821
- "30096302", "30311387", "32461654", "27328919", "31820981",
822
- "28678784", "31348638", "33016924", "31142840", "33883548",
823
- ]
824
- PAPER_CORPUS = [
825
- {"pmid": "34394960", "title": "Lipid nanoparticles for mRNA delivery.", "abstract": "Messenger RNA (mRNA) has emerged as a new category of therapeutic agent to prevent and treat various diseases. To function in vivo, mRNA requires safe, effective and stable delivery systems that protect the nucleic acid from degradation and that allow cellular uptake and mRNA release. Lipid nanoparticles have successfully entered the clinic for the delivery of mRNA; in particular, lipid nanoparticle-mRNA vaccines are now in clinical use against coronavirus disease 2019 (COVID-19), which marks a milestone for mRNA therapeutics. In this Review, we discuss the design of lipid nanoparticles for mRNA delivery and examine physiological barriers and possible administration routes for lipid nanoparticle-mRNA systems. We then consider key points for the clinical translation of lipid nanoparticle-mRNA formulations, including good manufacturing practice, stability, storage and safety, and highlight preclinical and clinical studies of lipid nanoparticle-mRNA therapeutics for infectious diseases, cancer and genetic disorders. Finally, we give an outlook to future possibilities and remaining challenges for this promising technology.", "journal": "Nat Rev Mater", "year": 2021, "topic": "LNP mRNA delivery"},
826
- {"pmid": "32251383", "title": "Selective organ targeting (SORT) nanoparticles for tissue-specific mRNA delivery and CRISPR-Cas gene editing.", "abstract": "CRISPR-Cas gene editing and messenger RNA-based protein replacement therapy hold tremendous potential to effectively treat disease-causing mutations with diverse cellular origin. However, it is currently impossible to rationally design nanoparticles that selectively target specific tissues. Here, we report a strategy termed selective organ targeting (SORT) wherein multiple classes of lipid nanoparticles are systematically engineered to exclusively edit extrahepatic tissues via addition of a supplemental SORT molecule. Lung-, spleen- and liver-targeted SORT lipid nanoparticles were designed to selectively edit therapeutically relevant cell types including epithelial cells, endothelial cells, B cells, T cells and hepatocytes. SORT is compatible with multiple gene editing techniques, including mRNA, Cas9 mRNA/single guide RNA and Cas9 ribonucleoprotein complexes, and is envisioned to aid the development of protein replacement and gene correction therapeutics in targeted tissues.", "journal": "Nat Nanotechnol", "year": 2020, "topic": "LNP organ selectivity"},
827
- {"pmid": "29653760", "title": "A Novel Amino Lipid Series for mRNA Delivery: Improved Endosomal Escape and Sustained Pharmacology and Safety in Non-human Primates.", "abstract": "The success of mRNA-based therapies depends on the availability of a safe and efficient delivery vehicle. Lipid nanoparticles have been identified as a viable option. However, there are concerns whether an acceptable tolerability profile for chronic dosing can be achieved. The efficiency and tolerability of lipid nanoparticles has been attributed to the amino lipid. Therefore, we developed a new series of amino lipids that address this concern. Clear structure-activity relationships were developed that resulted in a new amino lipid that affords efficient mRNA delivery in rodent and primate models with optimal pharmacokinetics. A 1-month toxicology evaluation in rat and non-human primate demonstrated no adverse events with the new lipid nanoparticle system. Mechanistic studies demonstrate that the improved efficiency can be attributed to increased endosomal escape. This effort has resulted in the first example of the ability to safely repeat dose mRNA-containing lipid nanoparticles in non-human primate at therapeutically relevant levels.", "journal": "Mol Ther", "year": 2018, "topic": "LNP ionizable lipid"},
828
- {"pmid": "22782619", "title": "Maximizing the potency of siRNA lipid nanoparticles for hepatic gene silencing in vivo.", "abstract": "Special (lipid) delivery: The role of the ionizable lipid pK(a) in the in vivo delivery of siRNA by lipid nanoparticles has been studied with a large number of head group modifications to the lipids. A tight correlation between the lipid pK(a) value and silencing of the mouse FVII gene (FVII ED(50) ) was found, with an optimal pK(a) range of 6.2-6.5. The most potent cationic lipid from this study has ED(50) levels around 0.005 mg kg(-1) in mice and less than 0.03 mg kg(-1) in non-human primates.", "journal": "Angew Chem Int Ed Engl", "year": 2012, "topic": "LNP ionizable lipid siRNA"},
829
- {"pmid": "33208369", "title": "CRISPR-Cas9 genome editing using targeted lipid nanoparticles for cancer therapy.", "abstract": "Harnessing CRISPR-Cas9 technology for cancer therapeutics has been hampered by low editing efficiency in tumors and potential toxicity of existing delivery systems. Here, we describe a safe and efficient lipid nanoparticle (LNP) for the delivery of Cas9 mRNA and sgRNAs that use a novel amino-ionizable lipid. A single intracerebral injection of CRISPR-LNPs against glioblastoma multif...", "journal": "Sci Adv", "year": 2020, "topic": "LNP cancer CRISPR"},
830
- # ... (скорочено для лаконічності; повний список з 20 статей можна взяти з вашого файла)
831
- ]
832
- # (Для повного коду додайте всі 20 записів з PAPER_CORPUS, які ви надали раніше)
833
-
834
- _rag_index = None
835
- _rag_embeddings = None
836
- _rag_model = None
837
- EMBED_MODEL = "all-MiniLM-L6-v2"
838
-
839
- def _build_index():
840
- global _rag_index, _rag_embeddings, _rag_model
841
- try:
842
- from sentence_transformers import SentenceTransformer
843
- import faiss
844
- except ImportError:
845
- return False, "sentence-transformers or faiss-cpu not installed. Run: pip install sentence-transformers faiss-cpu"
846
- _rag_model = SentenceTransformer(EMBED_MODEL)
847
- texts = [f"Title: {p['title']}\nAbstract: {p['abstract']}\nJournal: {p['journal']} ({p['year']})" for p in PAPER_CORPUS]
848
- _rag_embeddings = _rag_model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
849
- _rag_embeddings = _rag_embeddings / np.linalg.norm(_rag_embeddings, axis=1, keepdims=True)
850
- dim = _rag_embeddings.shape[1]
851
- _rag_index = faiss.IndexFlatIP(dim)
852
- _rag_index.add(_rag_embeddings.astype(np.float32))
853
- return True, f"Index built: {len(PAPER_CORPUS)} papers, {dim}-dim embeddings"
854
-
855
- def _confidence_flag(score: float, n_results: int) -> str:
856
- if score >= 0.55 and n_results >= 2:
857
- return "🟢 HIGH"
858
- elif score >= 0.35:
859
- return "🟡 MEDIUM"
860
- else:
861
- return "🔴 SPECULATIVE"
862
-
863
- def rag_query(question: str, top_k: int = 3) -> str:
864
- global _rag_index, _rag_model
865
- if _rag_index is None:
866
- ok, msg = _build_index()
867
- if not ok:
868
- return f"⚠️ RAG system unavailable: {msg}"
869
- try:
870
- from sentence_transformers import SentenceTransformer
871
- import faiss
872
- except ImportError:
873
- return "⚠️ Required packages not installed: `pip install sentence-transformers faiss-cpu`"
874
- q_emb = _rag_model.encode([question], convert_to_numpy=True, show_progress_bar=False)
875
- q_emb = q_emb / np.linalg.norm(q_emb, axis=1, keepdims=True)
876
- scores, indices = _rag_index.search(q_emb.astype(np.float32), top_k)
877
- scores = scores[0]
878
- indices = indices[0]
879
- MIN_SCORE = 0.20
880
- valid = [(s, i) for s, i in zip(scores, indices) if s >= MIN_SCORE and i >= 0]
881
- if not valid:
882
- return (
883
- "❌ **No relevant information found in the indexed papers.**\n\n"
884
- "This assistant only answers questions based on 20 indexed papers on:\n"
885
- "- LNP drug delivery (brain/GBM focus)\n"
886
- "- Protein corona biology\n"
887
- "- Cancer variants and precision oncology\n"
888
- "- Liquid biopsy biomarkers\n\n"
889
- "Please rephrase your question or ask about these topics."
890
- )
891
- top_score = valid[0][0]
892
- confidence = _confidence_flag(top_score, len(valid))
893
- answer_parts = [f"**Confidence: {confidence}** (retrieval score: {top_score:.3f})\n"]
894
- for rank, (score, idx) in enumerate(valid, 1):
895
- paper = PAPER_CORPUS[idx]
896
- answer_parts.append(
897
- f"### [{rank}] {paper['title']}\n"
898
- f"*{paper['journal']}, {paper['year']} | PMID: {paper['pmid']}*\n\n"
899
- f"{paper['abstract']}\n"
900
- f"*(Relevance score: {score:.3f})*"
901
- )
902
- answer_parts.append(
903
- "\n---\n"
904
- "⚠️ *This answer is grounded exclusively in the 20 indexed papers. "
905
- "For clinical decisions, consult primary literature and domain experts.*"
906
- )
907
- return "\n\n".join(answer_parts)
908
-
909
- def build_chatbot_tab():
910
- gr.Markdown(
911
- "**Status:** Model loads on first query (~30s)...\n\n"
912
- "Ask questions about LNP delivery, protein corona, cancer variants, or liquid biopsy. "
913
- "Answers are grounded in 20 indexed papers — never fabricated."
914
- )
915
- with gr.Row():
916
- with gr.Column(scale=3):
917
- chatbox = gr.Chatbot(label="Research Assistant", height=420, bubble_full_width=False)
918
- with gr.Row():
919
- user_input = gr.Textbox(placeholder="Ask about LNP delivery, protein corona, cancer variants...", label="Your question", lines=2, scale=4)
920
- send_btn = gr.Button("Send", variant="primary", scale=1)
921
- clear_btn = gr.Button("🗑️ Clear conversation", size="sm")
922
- with gr.Column(scale=1):
923
- gr.Markdown("### 📚 Indexed Topics")
924
- gr.Markdown(
925
- "**LNP Delivery**\n"
926
- "- mRNA-LNP formulation\n"
927
- "- Ionizable lipids & pKa\n"
928
- "- Brain/GBM delivery\n"
929
- "- Organ selectivity (SORT)\n"
930
- "- PEG & anti-PEG immunity\n\n"
931
- "**Protein Corona**\n"
932
- "- Hard vs soft corona\n"
933
- "- Vroman effect kinetics\n"
934
- "- ApoE/LDLR targeting\n\n"
935
- "**Cancer Variants**\n"
936
- "- TP53 mutation spectrum\n"
937
- "- KRAS G12C resistance\n"
938
- "- ClinVar classification\n\n"
939
- "**Liquid Biopsy**\n"
940
- "- ctDNA methylation\n"
941
- "- cfRNA biomarkers"
942
- )
943
- gr.Markdown(
944
- "### 🔑 Confidence Flags\n"
945
- "🟢 **HIGH** — strong match (≥0.55)\n"
946
- "🟡 **MEDIUM** — moderate match (0.35–0.55)\n"
947
- "🔴 **SPECULATIVE** — weak match (<0.35)\n\n"
948
- "*Only answers from indexed papers are shown.*"
949
- )
950
- def respond(message, history):
951
- if not message.strip():
952
- return history, ""
953
- answer = rag_query(message.strip())
954
- history = history or []
955
- history.append((message, answer))
956
- return history, ""
957
- send_btn.click(respond, inputs=[user_input, chatbox], outputs=[chatbox, user_input])
958
- user_input.submit(respond, inputs=[user_input, chatbox], outputs=[chatbox, user_input])
959
- clear_btn.click(lambda: ([], ""), outputs=[chatbox, user_input])
960
-
961
  # ========== ДОПОМІЖНІ ФУНКЦІЇ ДЛЯ UI ==========
962
  def section_header(code, name, tagline, projects_html):
963
  return (
 
27
  import plotly.express as px
28
  import tabulate
29
 
30
+ # Імпорт з окремих модулів
31
+ from journal import journal_log, journal_read, clear_journal, JOURNAL_CATEGORIES
32
+ from chatbot import build_chatbot_tab
33
+
34
  # ========== Діагностичний друк ==========
35
  print("Gradio version:", gr.__version__)
36
  print("Starting app...")
 
92
  return wrapper
93
  return decorator
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # ========== БАЗИ ДАНИХ ==========
96
  MIRNA_DB = {
97
  "BRCA2": [
 
739
  )
740
  return fig
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  # ========== ДОПОМІЖНІ ФУНКЦІЇ ДЛЯ UI ==========
743
  def section_header(code, name, tagline, projects_html):
744
  return (