Update app.py
Browse files
app.py
CHANGED
|
@@ -27,6 +27,10 @@ import plotly.graph_objects as go
|
|
| 27 |
import plotly.express as px
|
| 28 |
import tabulate
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# ========== Діагностичний друк ==========
|
| 31 |
print("Gradio version:", gr.__version__)
|
| 32 |
print("Starting app...")
|
|
@@ -88,84 +92,6 @@ def retry(max_attempts=3, delay=1):
|
|
| 88 |
return wrapper
|
| 89 |
return decorator
|
| 90 |
|
| 91 |
-
# ─────────────────────────────────────────────
|
| 92 |
-
# LAB JOURNAL (уніфікована система кодування S1)
|
| 93 |
-
# ─────────────────────────────────────────────
|
| 94 |
-
JOURNAL_FILE = "./lab_journal.csv"
|
| 95 |
-
JOURNAL_CATEGORIES = [
|
| 96 |
-
# S1-A Genomics
|
| 97 |
-
"S1-A·R1a", # OpenVariant
|
| 98 |
-
"S1-A·R1b", # Somatic Classifier (future)
|
| 99 |
-
"S1-A·R2e", # Research Assistant (RAG Chatbot) <-- додано
|
| 100 |
-
# S1-B RNA
|
| 101 |
-
"S1-B·R1a", # BRCA2 miRNA
|
| 102 |
-
"S1-B·R2a", # TP53 siRNA
|
| 103 |
-
"S1-B·R3a", # lncRNA-TREM2
|
| 104 |
-
"S1-B·R3b", # ASO Designer
|
| 105 |
-
# S1-C Drug
|
| 106 |
-
"S1-C·R1a", # FGFR3 RNA Drug
|
| 107 |
-
"S1-C·R1b", # SL Drug Mapping (future)
|
| 108 |
-
"S1-C·R2a", # Frontier (future)
|
| 109 |
-
# S1-D LNP
|
| 110 |
-
"S1-D·R1a", # LNP Corona
|
| 111 |
-
"S1-D·R2a", # Flow Corona
|
| 112 |
-
"S1-D·R3a", # LNP Brain
|
| 113 |
-
"S1-D·R4a", # AutoCorona NLP
|
| 114 |
-
"S1-D·R5a", # CSF/Vitreous/BM (future)
|
| 115 |
-
"S1-D·R6a", # Corona Database
|
| 116 |
-
# S1-E Biomarkers
|
| 117 |
-
"S1-E·R1a", # Liquid Biopsy
|
| 118 |
-
"S1-E·R1b", # Protein Validator (future)
|
| 119 |
-
"S1-E·R2a", # Multi-protein Biomarkers
|
| 120 |
-
# S1-F Rare
|
| 121 |
-
"S1-F·R1a", # DIPG Toolkit
|
| 122 |
-
"S1-F·R2a", # UVM Toolkit
|
| 123 |
-
"S1-F·R3a", # pAML Toolkit
|
| 124 |
-
# S1-G 3D
|
| 125 |
-
"S1-G·General", # 3D Models
|
| 126 |
-
"Manual"
|
| 127 |
-
]
|
| 128 |
-
|
| 129 |
-
def journal_log(category: str, action: str, result: str, note: str = ""):
|
| 130 |
-
"""Log an entry with category."""
|
| 131 |
-
ts = datetime.now().isoformat()
|
| 132 |
-
row = [ts, category, action, result[:200], note]
|
| 133 |
-
write_header = not os.path.exists(JOURNAL_FILE)
|
| 134 |
-
with open(JOURNAL_FILE, "a", newline="", encoding="utf-8") as f:
|
| 135 |
-
w = csv.writer(f)
|
| 136 |
-
if write_header:
|
| 137 |
-
w.writerow(["timestamp", "category", "action", "result_summary", "note"])
|
| 138 |
-
w.writerow(row)
|
| 139 |
-
return ts
|
| 140 |
-
|
| 141 |
-
def journal_read(category: str = "All") -> str:
|
| 142 |
-
"""Read journal entries, optionally filtered by category. Returns markdown."""
|
| 143 |
-
if not os.path.exists(JOURNAL_FILE):
|
| 144 |
-
return "No entries yet."
|
| 145 |
-
try:
|
| 146 |
-
df = pd.read_csv(JOURNAL_FILE)
|
| 147 |
-
if df.empty:
|
| 148 |
-
return "No entries yet."
|
| 149 |
-
if category != "All":
|
| 150 |
-
df = df[df["category"] == category]
|
| 151 |
-
if df.empty:
|
| 152 |
-
return f"No entries for category: {category}"
|
| 153 |
-
df_display = df[["timestamp", "category", "action", "result_summary", "note"]].tail(50)
|
| 154 |
-
df_display.columns = ["Timestamp", "Category", "Action", "Result", "Observation"]
|
| 155 |
-
return df_display.to_markdown(index=False)
|
| 156 |
-
except Exception as e:
|
| 157 |
-
print(f"Journal read error: {e}")
|
| 158 |
-
return "Error reading journal."
|
| 159 |
-
|
| 160 |
-
def clear_journal():
|
| 161 |
-
try:
|
| 162 |
-
if os.path.exists(JOURNAL_FILE):
|
| 163 |
-
os.remove(JOURNAL_FILE)
|
| 164 |
-
return "Journal cleared."
|
| 165 |
-
except Exception as e:
|
| 166 |
-
print(f"Clear journal error: {e}")
|
| 167 |
-
return "Error clearing journal."
|
| 168 |
-
|
| 169 |
# ========== БАЗИ ДАНИХ ==========
|
| 170 |
MIRNA_DB = {
|
| 171 |
"BRCA2": [
|
|
@@ -813,151 +739,6 @@ def plot_corona():
|
|
| 813 |
)
|
| 814 |
return fig
|
| 815 |
|
| 816 |
-
# ========== RAG CHATBOT (S1-A·R2e) ==========
|
| 817 |
-
# --- Paper corpus for RAG ---
|
| 818 |
-
PAPER_PMIDS = [
|
| 819 |
-
"34394960", "32251383", "29653760", "22782619", "33208369",
|
| 820 |
-
"18809927", "22086677", "31565943", "33754708", "20461061",
|
| 821 |
-
"30096302", "30311387", "32461654", "27328919", "31820981",
|
| 822 |
-
"28678784", "31348638", "33016924", "31142840", "33883548",
|
| 823 |
-
]
|
| 824 |
-
PAPER_CORPUS = [
|
| 825 |
-
{"pmid": "34394960", "title": "Lipid nanoparticles for mRNA delivery.", "abstract": "Messenger RNA (mRNA) has emerged as a new category of therapeutic agent to prevent and treat various diseases. To function in vivo, mRNA requires safe, effective and stable delivery systems that protect the nucleic acid from degradation and that allow cellular uptake and mRNA release. Lipid nanoparticles have successfully entered the clinic for the delivery of mRNA; in particular, lipid nanoparticle-mRNA vaccines are now in clinical use against coronavirus disease 2019 (COVID-19), which marks a milestone for mRNA therapeutics. In this Review, we discuss the design of lipid nanoparticles for mRNA delivery and examine physiological barriers and possible administration routes for lipid nanoparticle-mRNA systems. We then consider key points for the clinical translation of lipid nanoparticle-mRNA formulations, including good manufacturing practice, stability, storage and safety, and highlight preclinical and clinical studies of lipid nanoparticle-mRNA therapeutics for infectious diseases, cancer and genetic disorders. Finally, we give an outlook to future possibilities and remaining challenges for this promising technology.", "journal": "Nat Rev Mater", "year": 2021, "topic": "LNP mRNA delivery"},
|
| 826 |
-
{"pmid": "32251383", "title": "Selective organ targeting (SORT) nanoparticles for tissue-specific mRNA delivery and CRISPR-Cas gene editing.", "abstract": "CRISPR-Cas gene editing and messenger RNA-based protein replacement therapy hold tremendous potential to effectively treat disease-causing mutations with diverse cellular origin. However, it is currently impossible to rationally design nanoparticles that selectively target specific tissues. Here, we report a strategy termed selective organ targeting (SORT) wherein multiple classes of lipid nanoparticles are systematically engineered to exclusively edit extrahepatic tissues via addition of a supplemental SORT molecule. Lung-, spleen- and liver-targeted SORT lipid nanoparticles were designed to selectively edit therapeutically relevant cell types including epithelial cells, endothelial cells, B cells, T cells and hepatocytes. SORT is compatible with multiple gene editing techniques, including mRNA, Cas9 mRNA/single guide RNA and Cas9 ribonucleoprotein complexes, and is envisioned to aid the development of protein replacement and gene correction therapeutics in targeted tissues.", "journal": "Nat Nanotechnol", "year": 2020, "topic": "LNP organ selectivity"},
|
| 827 |
-
{"pmid": "29653760", "title": "A Novel Amino Lipid Series for mRNA Delivery: Improved Endosomal Escape and Sustained Pharmacology and Safety in Non-human Primates.", "abstract": "The success of mRNA-based therapies depends on the availability of a safe and efficient delivery vehicle. Lipid nanoparticles have been identified as a viable option. However, there are concerns whether an acceptable tolerability profile for chronic dosing can be achieved. The efficiency and tolerability of lipid nanoparticles has been attributed to the amino lipid. Therefore, we developed a new series of amino lipids that address this concern. Clear structure-activity relationships were developed that resulted in a new amino lipid that affords efficient mRNA delivery in rodent and primate models with optimal pharmacokinetics. A 1-month toxicology evaluation in rat and non-human primate demonstrated no adverse events with the new lipid nanoparticle system. Mechanistic studies demonstrate that the improved efficiency can be attributed to increased endosomal escape. This effort has resulted in the first example of the ability to safely repeat dose mRNA-containing lipid nanoparticles in non-human primate at therapeutically relevant levels.", "journal": "Mol Ther", "year": 2018, "topic": "LNP ionizable lipid"},
|
| 828 |
-
{"pmid": "22782619", "title": "Maximizing the potency of siRNA lipid nanoparticles for hepatic gene silencing in vivo.", "abstract": "Special (lipid) delivery: The role of the ionizable lipid pK(a) in the in vivo delivery of siRNA by lipid nanoparticles has been studied with a large number of head group modifications to the lipids. A tight correlation between the lipid pK(a) value and silencing of the mouse FVII gene (FVII ED(50) ) was found, with an optimal pK(a) range of 6.2-6.5. The most potent cationic lipid from this study has ED(50) levels around 0.005 mg kg(-1) in mice and less than 0.03 mg kg(-1) in non-human primates.", "journal": "Angew Chem Int Ed Engl", "year": 2012, "topic": "LNP ionizable lipid siRNA"},
|
| 829 |
-
{"pmid": "33208369", "title": "CRISPR-Cas9 genome editing using targeted lipid nanoparticles for cancer therapy.", "abstract": "Harnessing CRISPR-Cas9 technology for cancer therapeutics has been hampered by low editing efficiency in tumors and potential toxicity of existing delivery systems. Here, we describe a safe and efficient lipid nanoparticle (LNP) for the delivery of Cas9 mRNA and sgRNAs that use a novel amino-ionizable lipid. A single intracerebral injection of CRISPR-LNPs against glioblastoma multif...", "journal": "Sci Adv", "year": 2020, "topic": "LNP cancer CRISPR"},
|
| 830 |
-
# ... (скорочено для лаконічності; повний список з 20 статей можна взяти з вашого файла)
|
| 831 |
-
]
|
| 832 |
-
# (Для повного коду додайте всі 20 записів з PAPER_CORPUS, які ви надали раніше)
|
| 833 |
-
|
| 834 |
-
_rag_index = None
|
| 835 |
-
_rag_embeddings = None
|
| 836 |
-
_rag_model = None
|
| 837 |
-
EMBED_MODEL = "all-MiniLM-L6-v2"
|
| 838 |
-
|
| 839 |
-
def _build_index():
|
| 840 |
-
global _rag_index, _rag_embeddings, _rag_model
|
| 841 |
-
try:
|
| 842 |
-
from sentence_transformers import SentenceTransformer
|
| 843 |
-
import faiss
|
| 844 |
-
except ImportError:
|
| 845 |
-
return False, "sentence-transformers or faiss-cpu not installed. Run: pip install sentence-transformers faiss-cpu"
|
| 846 |
-
_rag_model = SentenceTransformer(EMBED_MODEL)
|
| 847 |
-
texts = [f"Title: {p['title']}\nAbstract: {p['abstract']}\nJournal: {p['journal']} ({p['year']})" for p in PAPER_CORPUS]
|
| 848 |
-
_rag_embeddings = _rag_model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
| 849 |
-
_rag_embeddings = _rag_embeddings / np.linalg.norm(_rag_embeddings, axis=1, keepdims=True)
|
| 850 |
-
dim = _rag_embeddings.shape[1]
|
| 851 |
-
_rag_index = faiss.IndexFlatIP(dim)
|
| 852 |
-
_rag_index.add(_rag_embeddings.astype(np.float32))
|
| 853 |
-
return True, f"Index built: {len(PAPER_CORPUS)} papers, {dim}-dim embeddings"
|
| 854 |
-
|
| 855 |
-
def _confidence_flag(score: float, n_results: int) -> str:
|
| 856 |
-
if score >= 0.55 and n_results >= 2:
|
| 857 |
-
return "🟢 HIGH"
|
| 858 |
-
elif score >= 0.35:
|
| 859 |
-
return "🟡 MEDIUM"
|
| 860 |
-
else:
|
| 861 |
-
return "🔴 SPECULATIVE"
|
| 862 |
-
|
| 863 |
-
def rag_query(question: str, top_k: int = 3) -> str:
|
| 864 |
-
global _rag_index, _rag_model
|
| 865 |
-
if _rag_index is None:
|
| 866 |
-
ok, msg = _build_index()
|
| 867 |
-
if not ok:
|
| 868 |
-
return f"⚠️ RAG system unavailable: {msg}"
|
| 869 |
-
try:
|
| 870 |
-
from sentence_transformers import SentenceTransformer
|
| 871 |
-
import faiss
|
| 872 |
-
except ImportError:
|
| 873 |
-
return "⚠️ Required packages not installed: `pip install sentence-transformers faiss-cpu`"
|
| 874 |
-
q_emb = _rag_model.encode([question], convert_to_numpy=True, show_progress_bar=False)
|
| 875 |
-
q_emb = q_emb / np.linalg.norm(q_emb, axis=1, keepdims=True)
|
| 876 |
-
scores, indices = _rag_index.search(q_emb.astype(np.float32), top_k)
|
| 877 |
-
scores = scores[0]
|
| 878 |
-
indices = indices[0]
|
| 879 |
-
MIN_SCORE = 0.20
|
| 880 |
-
valid = [(s, i) for s, i in zip(scores, indices) if s >= MIN_SCORE and i >= 0]
|
| 881 |
-
if not valid:
|
| 882 |
-
return (
|
| 883 |
-
"❌ **No relevant information found in the indexed papers.**\n\n"
|
| 884 |
-
"This assistant only answers questions based on 20 indexed papers on:\n"
|
| 885 |
-
"- LNP drug delivery (brain/GBM focus)\n"
|
| 886 |
-
"- Protein corona biology\n"
|
| 887 |
-
"- Cancer variants and precision oncology\n"
|
| 888 |
-
"- Liquid biopsy biomarkers\n\n"
|
| 889 |
-
"Please rephrase your question or ask about these topics."
|
| 890 |
-
)
|
| 891 |
-
top_score = valid[0][0]
|
| 892 |
-
confidence = _confidence_flag(top_score, len(valid))
|
| 893 |
-
answer_parts = [f"**Confidence: {confidence}** (retrieval score: {top_score:.3f})\n"]
|
| 894 |
-
for rank, (score, idx) in enumerate(valid, 1):
|
| 895 |
-
paper = PAPER_CORPUS[idx]
|
| 896 |
-
answer_parts.append(
|
| 897 |
-
f"### [{rank}] {paper['title']}\n"
|
| 898 |
-
f"*{paper['journal']}, {paper['year']} | PMID: {paper['pmid']}*\n\n"
|
| 899 |
-
f"{paper['abstract']}\n"
|
| 900 |
-
f"*(Relevance score: {score:.3f})*"
|
| 901 |
-
)
|
| 902 |
-
answer_parts.append(
|
| 903 |
-
"\n---\n"
|
| 904 |
-
"⚠️ *This answer is grounded exclusively in the 20 indexed papers. "
|
| 905 |
-
"For clinical decisions, consult primary literature and domain experts.*"
|
| 906 |
-
)
|
| 907 |
-
return "\n\n".join(answer_parts)
|
| 908 |
-
|
| 909 |
-
def build_chatbot_tab():
|
| 910 |
-
gr.Markdown(
|
| 911 |
-
"**Status:** Model loads on first query (~30s)...\n\n"
|
| 912 |
-
"Ask questions about LNP delivery, protein corona, cancer variants, or liquid biopsy. "
|
| 913 |
-
"Answers are grounded in 20 indexed papers — never fabricated."
|
| 914 |
-
)
|
| 915 |
-
with gr.Row():
|
| 916 |
-
with gr.Column(scale=3):
|
| 917 |
-
chatbox = gr.Chatbot(label="Research Assistant", height=420, bubble_full_width=False)
|
| 918 |
-
with gr.Row():
|
| 919 |
-
user_input = gr.Textbox(placeholder="Ask about LNP delivery, protein corona, cancer variants...", label="Your question", lines=2, scale=4)
|
| 920 |
-
send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 921 |
-
clear_btn = gr.Button("🗑️ Clear conversation", size="sm")
|
| 922 |
-
with gr.Column(scale=1):
|
| 923 |
-
gr.Markdown("### 📚 Indexed Topics")
|
| 924 |
-
gr.Markdown(
|
| 925 |
-
"**LNP Delivery**\n"
|
| 926 |
-
"- mRNA-LNP formulation\n"
|
| 927 |
-
"- Ionizable lipids & pKa\n"
|
| 928 |
-
"- Brain/GBM delivery\n"
|
| 929 |
-
"- Organ selectivity (SORT)\n"
|
| 930 |
-
"- PEG & anti-PEG immunity\n\n"
|
| 931 |
-
"**Protein Corona**\n"
|
| 932 |
-
"- Hard vs soft corona\n"
|
| 933 |
-
"- Vroman effect kinetics\n"
|
| 934 |
-
"- ApoE/LDLR targeting\n\n"
|
| 935 |
-
"**Cancer Variants**\n"
|
| 936 |
-
"- TP53 mutation spectrum\n"
|
| 937 |
-
"- KRAS G12C resistance\n"
|
| 938 |
-
"- ClinVar classification\n\n"
|
| 939 |
-
"**Liquid Biopsy**\n"
|
| 940 |
-
"- ctDNA methylation\n"
|
| 941 |
-
"- cfRNA biomarkers"
|
| 942 |
-
)
|
| 943 |
-
gr.Markdown(
|
| 944 |
-
"### 🔑 Confidence Flags\n"
|
| 945 |
-
"🟢 **HIGH** — strong match (≥0.55)\n"
|
| 946 |
-
"🟡 **MEDIUM** — moderate match (0.35–0.55)\n"
|
| 947 |
-
"🔴 **SPECULATIVE** — weak match (<0.35)\n\n"
|
| 948 |
-
"*Only answers from indexed papers are shown.*"
|
| 949 |
-
)
|
| 950 |
-
def respond(message, history):
|
| 951 |
-
if not message.strip():
|
| 952 |
-
return history, ""
|
| 953 |
-
answer = rag_query(message.strip())
|
| 954 |
-
history = history or []
|
| 955 |
-
history.append((message, answer))
|
| 956 |
-
return history, ""
|
| 957 |
-
send_btn.click(respond, inputs=[user_input, chatbox], outputs=[chatbox, user_input])
|
| 958 |
-
user_input.submit(respond, inputs=[user_input, chatbox], outputs=[chatbox, user_input])
|
| 959 |
-
clear_btn.click(lambda: ([], ""), outputs=[chatbox, user_input])
|
| 960 |
-
|
| 961 |
# ========== ДОПОМІЖНІ ФУНКЦІЇ ДЛЯ UI ==========
|
| 962 |
def section_header(code, name, tagline, projects_html):
|
| 963 |
return (
|
|
|
|
| 27 |
import plotly.express as px
|
| 28 |
import tabulate
|
| 29 |
|
| 30 |
+
# Імпорт з окремих модулів
|
| 31 |
+
from journal import journal_log, journal_read, clear_journal, JOURNAL_CATEGORIES
|
| 32 |
+
from chatbot import build_chatbot_tab
|
| 33 |
+
|
| 34 |
# ========== Діагностичний друк ==========
|
| 35 |
print("Gradio version:", gr.__version__)
|
| 36 |
print("Starting app...")
|
|
|
|
| 92 |
return wrapper
|
| 93 |
return decorator
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
# ========== БАЗИ ДАНИХ ==========
|
| 96 |
MIRNA_DB = {
|
| 97 |
"BRCA2": [
|
|
|
|
| 739 |
)
|
| 740 |
return fig
|
| 741 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 742 |
# ========== ДОПОМІЖНІ ФУНКЦІЇ ДЛЯ UI ==========
|
| 743 |
def section_header(code, name, tagline, projects_html):
|
| 744 |
return (
|