Spaces:

essprasad
/

CT-Chat-V2

Sleeping

App Files Files Community

essprasad commited on Dec 6, 2025

Commit

a400884

verified ·

1 Parent(s): d77b04a

Upload api_clients.py

Browse files

Files changed (1) hide show

utils/api_clients.py +194 -0

utils/api_clients.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""
+utils/api_clients.py
+------------------------------------------------
+Enhanced API clients for:
+- PubMed (NCBI)
+- ClinicalTrials.gov
+- FDA Open Data
+- WHO ICTRP
+------------------------------------------------
+Optimized for hybrid VAN-based query processing:
+- Automatically truncates long queries (top keywords only)
+- Resilient to API downtime or malformed responses
+- HTML formatted results for Gradio rendering
+"""
+import requests
+import html
+import re
+import traceback
+# ============================================================
+# 🔹 Query Normalization
+# ============================================================
+def _normalize_query(query: str, max_words: int = 5) -> str:
+    """
+    Cleans and shortens user query for API compatibility.
+    Removes filler phrases and limits to key words.
+    """
+    q = query.lower()
+    q = re.sub(
+        r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b",
+        "",
+        q,
+    )
+    q = re.sub(r"[^a-z0-9\s]", "", q)
+    q = re.sub(r"\s+", " ", q).strip()
+    # limit to first few words (avoid 404s from overlong queries)
+    words = q.split()
+    q = " ".join(words[:max_words])
+    return q or "clinical trial"
+# ============================================================
+# 🔹 PubMed API (NCBI E-Utilities)
+# ============================================================
+def fetch_pubmed(query: str, limit: int = 3) -> str:
+    try:
+        q = _normalize_query(query)
+        base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+        esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json"
+        res = requests.get(esearch, timeout=10)
+        res.raise_for_status()
+        ids = res.json().get("esearchresult", {}).get("idlist", [])
+        if not ids:
+            return f"<i>No PubMed results found for <b>{html.escape(q)}</b>.</i>"
+        summaries = []
+        for pmid in ids:
+            summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
+            sres = requests.get(summary_url, timeout=10)
+            sres.raise_for_status()
+            doc = sres.json()["result"].get(pmid, {})
+            title = html.escape(doc.get("title", "Untitled"))
+            source = html.escape(doc.get("source", ""))
+            pubdate = html.escape(doc.get("pubdate", ""))
+            link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+            summaries.append(
+                f"<b>{title}</b><br>{source} ({pubdate})<br>"
+                f"<a href='{link}' target='_blank'>[PubMed]</a>"
+            )
+        return "<br><br>".join(summaries)
+    except Exception as e:
+        traceback.print_exc()
+        return f"<i>PubMed fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
+# ============================================================
+# 🔹 ClinicalTrials.gov API
+# ============================================================
+def fetch_clinicaltrials(query: str, limit: int = 3) -> str:
+    """
+    Retrieves brief summaries of matching trials from ClinicalTrials.gov.
+    Automatically truncates query to avoid 404s on long input.
+    """
+    try:
+        q = _normalize_query(query)
+        url = (
+            f"https://clinicaltrials.gov/api/query/study_fields?"
+            f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus"
+            f"&max_rnk={limit}&fmt=json"
+        )
+        res = requests.get(url, timeout=10)
+        res.raise_for_status()
+        studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
+        if not studies:
+            return f"<i>No trials found for <b>{html.escape(q)}</b>.</i>"
+        formatted = []
+        for s in studies:
+            nct = s.get("NCTId", [""])[0]
+            title = html.escape(s.get("BriefTitle", [""])[0])
+            condition = html.escape(", ".join(s.get("Condition", [])))
+            status = html.escape(s.get("OverallStatus", ["Unknown"])[0])
+            link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#"
+            formatted.append(
+                f"<b>{title}</b><br>"
+                f"Condition: {condition or 'N/A'}<br>"
+                f"Status: {status}<br>"
+                f"<a href='{link}' target='_blank'>[ClinicalTrials.gov]</a>"
+            )
+        return "<br><br>".join(formatted)
+    except Exception as e:
+        traceback.print_exc()
+        return f"<i>ClinicalTrials.gov fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
+# ============================================================
+# 🔹 FDA Open Data API
+# ============================================================
+def fetch_fda(query: str, limit: int = 3) -> str:
+    """
+    Retrieves FDA label and safety data for a given compound/drug name.
+    """
+    try:
+        q = _normalize_query(query)
+        url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}"
+        res = requests.get(url, timeout=10)
+        if res.status_code == 404:
+            return f"<i>No FDA data found for <b>{html.escape(q)}</b>.</i>"
+        res.raise_for_status()
+        data = res.json().get("results", [])
+        if not data:
+            return f"<i>No FDA label results found for <b>{html.escape(q)}</b>.</i>"
+        formatted = []
+        for entry in data:
+            brand = ", ".join(entry.get("openfda", {}).get("brand_name", []))
+            generic = ", ".join(entry.get("openfda", {}).get("generic_name", []))
+            purpose = html.escape(" ".join(entry.get("purpose", [])[:1]))
+            warnings = html.escape(" ".join(entry.get("warnings", [])[:1]))
+            link = "https://open.fda.gov/drug/label/"
+            formatted.append(
+                f"<b>{brand or q}</b> ({generic or 'N/A'})<br>"
+                f"<u>Purpose:</u> {purpose or 'N/A'}<br>"
+                f"<u>Warning:</u> {warnings or 'N/A'}<br>"
+                f"<a href='{link}' target='_blank'>[FDA Label]</a>"
+            )
+        return "<br><br>".join(formatted)
+    except Exception as e:
+        traceback.print_exc()
+        return f"<i>FDA fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
+# ============================================================
+# 🔹 WHO ICTRP (Backup Trial Source)
+# ============================================================
+def fetch_who_trials(query: str, limit: int = 2) -> str:
+    """
+    Optional backup trial search from WHO ICTRP API.
+    Returns simplified summaries for readability.
+    """
+    try:
+        q = _normalize_query(query)
+        url = f"https://trialsearch.who.int/api/TrialSearch?query={q}"
+        res = requests.get(url, timeout=10)
+        if res.status_code != 200:
+            return "<i>WHO ICTRP API unavailable or throttled.</i>"
+        trials = res.json().get("TrialSearchResult", [])
+        if not trials:
+            return f"<i>No WHO trials found for <b>{html.escape(q)}</b>.</i>"
+        formatted = []
+        for t in trials[:limit]:
+            title = html.escape(t.get("Scientific_title", "Untitled"))
+            registry = html.escape(t.get("Register", ""))
+            country = html.escape(t.get("Recruitment_Country", ""))
+            formatted.append(
+                f"<b>{title}</b><br>{registry or 'Registry Unknown'} — {country or 'N/A'}"
+            )
+        return "<br><br>".join(formatted)
+    except Exception as e:
+        traceback.print_exc()
+        return f"<i>WHO ICTRP fetch failed for <b>{html.escape(query)}</b>: {e}</i>"