Spaces:

K-RnD-Lab
/

Cancer-Research-Suite_03-2026

Sleeping

App Files Files Community

TEZv commited on Mar 10

Commit

1e6ff6c

verified ·

1 Parent(s): d70b294

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -64

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ import matplotlib.colors as mcolors
 from matplotlib import cm
 import io
 from PIL import Image
 # ─────────────────────────────────────────────
 # CACHE SYSTEM  (TTL = 24 h)
@@ -50,29 +51,66 @@ def cache_set(endpoint: str, query: str, data):
     with open(path, "w") as f:
         json.dump(data, f)
 # ─────────────────────────────────────────────
 # LAB JOURNAL
 # ─────────────────────────────────────────────
 JOURNAL_FILE = "./lab_journal.csv"
-def journal_log(tab: str, action: str, result: str, note: str = ""):
     ts = datetime.datetime.utcnow().isoformat()
-    row = [ts, tab, action, result[:200], note]
     write_header = not os.path.exists(JOURNAL_FILE)
     with open(JOURNAL_FILE, "a", newline="") as f:
         w = csv.writer(f)
         if write_header:
-            w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
         w.writerow(row)
     return ts
-def journal_read() -> str:
     if not os.path.exists(JOURNAL_FILE):
         return "No entries yet."
-    df = pd.read_csv(JOURNAL_FILE)
-    if df.empty:
-        return "No entries yet."
-    return df.tail(20).to_markdown(index=False)
 # ─────────────────────────────────────────────
 # CONSTANTS
@@ -111,29 +149,38 @@ GNOMAD_GQL   = "https://gnomad.broadinstitute.org/api"
 CT_BASE      = "https://clinicaltrials.gov/api/v2"
 # ─────────────────────────────────────────────
-# SHARED API HELPERS
 # ─────────────────────────────────────────────
 def pubmed_count(query: str) -> int:
     """Return paper count for a PubMed query (cached)."""
     cached = cache_get("pubmed_count", query)
     if cached is not None:
         return cached
     try:
-        time.sleep(0.34)
         r = requests.get(
             f"{PUBMED_BASE}/esearch.fcgi",
             params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
-            timeout=10
         )
         r.raise_for_status()
-        count = int(r.json()["esearchresult"]["count"])
         cache_set("pubmed_count", query, count)
         return count
-    except Exception:
         return -1
 def pubmed_search(query: str, retmax: int = 10) -> list:
     """Return list of PMIDs (cached)."""
     cached = cache_get("pubmed_search", f"{query}_{retmax}")
@@ -144,16 +191,17 @@ def pubmed_search(query: str, retmax: int = 10) -> list:
         r = requests.get(
             f"{PUBMED_BASE}/esearch.fcgi",
             params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
-            timeout=10
         )
         r.raise_for_status()
-        ids = r.json()["esearchresult"]["idlist"]
         cache_set("pubmed_search", f"{query}_{retmax}", ids)
         return ids
     except Exception:
         return []
 def pubmed_summary(pmids: list) -> list:
     """Fetch summaries for a list of PMIDs."""
     if not pmids:
@@ -176,7 +224,7 @@ def pubmed_summary(pmids: list) -> list:
     except Exception:
         return []
 def ot_query(gql: str, variables: dict = None) -> dict:
     """Run an OpenTargets GraphQL query (cached)."""
     key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
@@ -191,12 +239,14 @@ def ot_query(gql: str, variables: dict = None) -> dict:
         )
         r.raise_for_status()
         data = r.json()
         cache_set("ot_gql", key, data)
         return data
     except Exception as e:
         return {"error": str(e)}
 # ─────────────────────────────────────────────
 # TAB A1 — GRAY ZONES EXPLORER
 # ─────────────────────────────────────────────
@@ -219,7 +269,8 @@ def a1_run(cancer_type: str):
     fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
     valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
-    cmap = plt.cm.get_cmap("YlOrRd")
     cmap.set_bad("white")
     masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
     im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
@@ -270,7 +321,6 @@ def _load_depmap_sample() -> pd.DataFrame:
     if "df" in _depmap_cache:
         return _depmap_cache["df"]
     # Use a curated list of known essential/cancer genes as fallback
-    # (full DepMap CSV is ~500 MB; we use the public summary endpoint instead)
     genes = [
         "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
         "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
@@ -309,6 +359,9 @@ def a2_run(cancer_type: str):
     }
     """
     ot_data = ot_query(gql, {"efoId": efo, "size": 40})
     rows_ot = []
     try:
         rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
@@ -351,8 +404,6 @@ def a2_run(cancer_type: str):
     depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
     # 5. Build result table
-    # Research gap index = |essentiality| / log(papers + 1)
-    # Per know-how: DepMap scores are negative for essential genes
     records = []
     for gene in genes_ot[:20]:
         raw_ess = depmap_dict.get(gene, None)
@@ -362,12 +413,10 @@ def a2_run(cancer_type: str):
             ess_display = "N/A"
             gap_idx = 0.0
         else:
-            # Invert: positive = more essential (per DepMap know-how: negative raw = essential)
             ess_inverted = -raw_ess
             ess_display = f"{ess_inverted:.3f}"
             papers_safe = max(papers, 0)
-            # Use log(papers + 2) to guarantee denominator >= log(2) ≈ 0.693
-            # preventing division-by-near-zero for genes with 0 publications
             gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
         records.append({
             "Gene": gene,
@@ -386,7 +435,10 @@ def a2_run(cancer_type: str):
         f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
         f"and replace `_load_depmap_sample()` in `app.py`."
     )
-    journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
     return result_df, note
@@ -409,7 +461,7 @@ def a3_run(hgvs: str):
         try:
             time.sleep(0.34)
             r = requests.get(
-                f"{PUBMED_BASE.replace('entrez/eutils','entrez/eutils')}/esearch.fcgi",
                 params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
                 timeout=10
             )
@@ -425,7 +477,7 @@ def a3_run(hgvs: str):
         try:
             time.sleep(0.34)
             r2 = requests.get(
-                f"{PUBMED_BASE.replace('entrez/eutils','entrez/eutils')}/esummary.fcgi",
                 params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
                 timeout=10
             )
@@ -458,8 +510,6 @@ def a3_run(hgvs: str):
         )
     # ── gnomAD ──
-    # gnomAD GraphQL expects rsID or gene-level; HGVS lookup is limited
-    # We attempt a search via the variant endpoint
     gnomad_cached = cache_get("gnomad", hgvs)
     if gnomad_cached is None:
         try:
@@ -514,7 +564,7 @@ def a3_run(hgvs: str):
         )
     result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
-    journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100])
     return "\n\n".join(result_parts)
@@ -618,6 +668,9 @@ def a5_run(cancer_type: str):
     }
     """
     ot_data = ot_query(gql, {"efoId": efo, "size": 50})
     rows_ot = []
     try:
         rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
@@ -676,8 +729,6 @@ def a5_run(cancer_type: str):
     )
     journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
     return df, note
 # ─────────────────────────────────────────────
 # GROUP B — LEARNING SANDBOX
 # ─────────────────────────────────────────────
@@ -996,23 +1047,6 @@ body { font-family: 'Inter', sans-serif; }
 footer { display: none !important; }
 """
-def build_journal_sidebar():
-    with gr.Column(scale=1, min_width=260):
-        gr.Markdown("## 📓 Lab Journal")
-        note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
-        save_btn = gr.Button("💾 Save Note", size="sm")
-        refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
-        journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
-        def save_note(note):
-            if note.strip():
-                journal_log("Manual", "note", note.strip(), note.strip())
-            return journal_read()
-        save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
-        refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
-    return note_input, journal_display
 def build_app():
     with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
@@ -1306,29 +1340,60 @@ def build_app():
                                     )
                                 b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
-            # ── SIDEBAR ──
             with gr.Column(scale=1, min_width=260):
                 gr.Markdown("## 📓 Lab Journal")
-                note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
-                save_btn = gr.Button("💾 Save Note", size="sm")
-                refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
-                journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
-                def save_note(note):
                     if note.strip():
-                        journal_log("Manual", "note", note.strip(), note.strip())
-                    return journal_read()
-                save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
-                refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
         # === Інтеграція з Learning Playground ===
         with gr.Row():
             gr.Markdown("""
             ### 🧪 New to the concepts?
             Explore our **[Learning Playground](https://huggingface.co/spaces/K-RnD-Lab/Learning-Playground_03-2026)** with simulated environments.
             """)
         gr.Markdown(
             "---\n"
             "*K R&D Lab Cancer Research Suite · "
@@ -1342,4 +1407,4 @@ def build_app():
 if __name__ == "__main__":
     app = build_app()
-    app.launch(share=False)

 from matplotlib import cm
 import io
 from PIL import Image
+from functools import wraps
 # ─────────────────────────────────────────────
 # CACHE SYSTEM  (TTL = 24 h)
     with open(path, "w") as f:
         json.dump(data, f)
+# ─────────────────────────────────────────────
+# RETRY DECORATOR
+# ─────────────────────────────────────────────
+def retry(max_attempts=3, delay=1):
+    """Decorator to retry a function on exception."""
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            for attempt in range(max_attempts):
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    print(f"Attempt {attempt+1} failed for {func.__name__}: {e}")
+                    if attempt == max_attempts - 1:
+                        raise
+                    time.sleep(delay * (attempt + 1))
+            return None
+        return wrapper
+    return decorator
 # ─────────────────────────────────────────────
 # LAB JOURNAL
 # ─────────────────────────────────────────────
 JOURNAL_FILE = "./lab_journal.csv"
+JOURNAL_CATEGORIES = [
+    "A1-GrayZones", "A2-TargetFinder", "A3-VariantLookup", "A4-LitGap", "A5-Orphans", "A6-Chatbot",
+    "B1-miRNA", "B2-siRNA", "B3-LNPCorona", "B4-FlowCorona", "B5-VariantConcepts",
+    "Manual"
+]
+def journal_log(category: str, action: str, result: str, note: str = ""):
+    """Log an entry with category."""
     ts = datetime.datetime.utcnow().isoformat()
+    row = [ts, category, action, result[:200], note]
     write_header = not os.path.exists(JOURNAL_FILE)
     with open(JOURNAL_FILE, "a", newline="") as f:
         w = csv.writer(f)
         if write_header:
+            w.writerow(["timestamp", "category", "action", "result_summary", "note"])
         w.writerow(row)
     return ts
+def journal_read(category: str = "All") -> str:
+    """Read journal entries, optionally filtered by category. Returns markdown."""
     if not os.path.exists(JOURNAL_FILE):
         return "No entries yet."
+    try:
+        df = pd.read_csv(JOURNAL_FILE)
+        if df.empty:
+            return "No entries yet."
+        if category != "All":
+            df = df[df["category"] == category]
+        if df.empty:
+            return f"No entries for category: {category}"
+        # Format for better readability
+        df_display = df[["timestamp", "category", "action", "result_summary"]].tail(20)
+        return df_display.to_markdown(index=False)
+    except Exception as e:
+        print(f"Journal read error: {e}")
+        return "Error reading journal."
 # ─────────────────────────────────────────────
 # CONSTANTS
 CT_BASE      = "https://clinicaltrials.gov/api/v2"
 # ─────────────────────────────────────────────
+# SHARED API HELPERS (with retry)
 # ─────────────────────────────────────────────
+@retry(max_attempts=3, delay=1)
 def pubmed_count(query: str) -> int:
     """Return paper count for a PubMed query (cached)."""
     cached = cache_get("pubmed_count", query)
     if cached is not None:
         return cached
     try:
+        time.sleep(0.34)  # Rate limit compliance
         r = requests.get(
             f"{PUBMED_BASE}/esearch.fcgi",
             params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
+            timeout=15
         )
         r.raise_for_status()
+        data = r.json()
+        count = int(data.get("esearchresult", {}).get("count", 0))
         cache_set("pubmed_count", query, count)
         return count
+    except requests.exceptions.Timeout:
+        print(f"Timeout for query: {query}")
+        return -1
+    except requests.exceptions.RequestException as e:
+        print(f"Request error for {query}: {e}")
+        return -1
+    except (KeyError, ValueError) as e:
+        print(f"Parsing error for {query}: {e}")
         return -1
+@retry(max_attempts=3, delay=1)
 def pubmed_search(query: str, retmax: int = 10) -> list:
     """Return list of PMIDs (cached)."""
     cached = cache_get("pubmed_search", f"{query}_{retmax}")
         r = requests.get(
             f"{PUBMED_BASE}/esearch.fcgi",
             params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
+            timeout=15
         )
         r.raise_for_status()
+        data = r.json()
+        ids = data.get("esearchresult", {}).get("idlist", [])
         cache_set("pubmed_search", f"{query}_{retmax}", ids)
         return ids
     except Exception:
         return []
+@retry(max_attempts=3, delay=1)
 def pubmed_summary(pmids: list) -> list:
     """Fetch summaries for a list of PMIDs."""
     if not pmids:
     except Exception:
         return []
+@retry(max_attempts=3, delay=1)
 def ot_query(gql: str, variables: dict = None) -> dict:
     """Run an OpenTargets GraphQL query (cached)."""
     key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
         )
         r.raise_for_status()
         data = r.json()
+        if "errors" in data:
+            print(f"GraphQL errors: {data['errors']}")
+            return {"error": data["errors"]}
         cache_set("ot_gql", key, data)
         return data
     except Exception as e:
+        print(f"OT query error: {e}")
         return {"error": str(e)}
 # ─────────────────────────────────────────────
 # TAB A1 — GRAY ZONES EXPLORER
 # ─────────────────────────────────────────────
     fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
     valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
+    # Виправлено deprecated get_cmap
+    cmap = plt.colormaps.get_cmap("YlOrRd")
     cmap.set_bad("white")
     masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
     im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
     if "df" in _depmap_cache:
         return _depmap_cache["df"]
     # Use a curated list of known essential/cancer genes as fallback
     genes = [
         "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
         "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
     }
     """
     ot_data = ot_query(gql, {"efoId": efo, "size": 40})
+    if "error" in ot_data:
+        return None, f"⚠️ OpenTargets API error: {ot_data['error']}\n\n*Source: OpenTargets | Date: {today}*"
     rows_ot = []
     try:
         rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
     depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
     # 5. Build result table
     records = []
     for gene in genes_ot[:20]:
         raw_ess = depmap_dict.get(gene, None)
             ess_display = "N/A"
             gap_idx = 0.0
         else:
+            # Invert: positive = more essential
             ess_inverted = -raw_ess
             ess_display = f"{ess_inverted:.3f}"
             papers_safe = max(papers, 0)
             gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
         records.append({
             "Gene": gene,
         f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
         f"and replace `_load_depmap_sample()` in `app.py`."
     )
+    if not result_df.empty:
+        journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene']}")
+    else:
+        journal_log("A2-TargetFinder", f"cancer={cancer_type}", "no targets found")
     return result_df, note
         try:
             time.sleep(0.34)
             r = requests.get(
+                f"{PUBMED_BASE}/esearch.fcgi",
                 params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
                 timeout=10
             )
         try:
             time.sleep(0.34)
             r2 = requests.get(
+                f"{PUBMED_BASE}/esummary.fcgi",
                 params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
                 timeout=10
             )
         )
     # ── gnomAD ──
     gnomad_cached = cache_get("gnomad", hgvs)
     if gnomad_cached is None:
         try:
         )
     result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
+    journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100] if result_parts else "no results")
     return "\n\n".join(result_parts)
     }
     """
     ot_data = ot_query(gql, {"efoId": efo, "size": 50})
+    if "error" in ot_data:
+        return None, f"⚠️ OpenTargets API error: {ot_data['error']}\n\n*Source: OpenTargets | Date: {today}*"
     rows_ot = []
     try:
         rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
     )
     journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
     return df, note
 # ─────────────────────────────────────────────
 # GROUP B — LEARNING SANDBOX
 # ─────────────────────────────────────────────
 footer { display: none !important; }
 """
 def build_app():
     with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
                                     )
                                 b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
+            # ── SIDEBAR (JOURNAL) ──
             with gr.Column(scale=1, min_width=260):
                 gr.Markdown("## 📓 Lab Journal")
+                note_category = gr.Dropdown(
+                    choices=JOURNAL_CATEGORIES,
+                    value="Manual",
+                    label="Category"
+                )
+                note_input = gr.Textbox(
+                    label="Observation",
+                    placeholder="Type your note here...",
+                    lines=3
+                )
+                with gr.Row():
+                    save_btn = gr.Button("💾 Save Note", size="sm", variant="primary")
+                    clear_note_btn = gr.Button("🗑️ Clear", size="sm")
+                save_status = gr.Markdown("")
+                gr.Markdown("---")
+                gr.Markdown("## 🔍 Full Journal")
+                journal_filter = gr.Dropdown(
+                    choices=["All"] + JOURNAL_CATEGORIES,
+                    value="All",
+                    label="Filter by category"
+                )
+                refresh_btn = gr.Button("🔄 Refresh", size="sm")
+                journal_display = gr.Markdown(value=journal_read())
+                def save_note(category, note):
                     if note.strip():
+                        journal_log(category, "manual note", note, note)
+                        return "✅ Note saved.", ""
+                    return "⚠️ Note is empty.", ""
+                def refresh_journal(category):
+                    return journal_read(category)
+                save_btn.click(
+                    save_note,
+                    inputs=[note_category, note_input],
+                    outputs=[save_status, note_input]
+                )
+                clear_note_btn.click(lambda: ("", ""), outputs=[note_input, save_status])
+                refresh_btn.click(refresh_journal, inputs=[journal_filter], outputs=journal_display)
+                journal_filter.change(refresh_journal, inputs=[journal_filter], outputs=journal_display)
         # === Інтеграція з Learning Playground ===
         with gr.Row():
             gr.Markdown("""
+            ---
             ### 🧪 New to the concepts?
             Explore our **[Learning Playground](https://huggingface.co/spaces/K-RnD-Lab/Learning-Playground_03-2026)** with simulated environments.
             """)
         gr.Markdown(
             "---\n"
             "*K R&D Lab Cancer Research Suite · "
 if __name__ == "__main__":
     app = build_app()
+    app.launch(share=False)