essprasad commited on
Commit
a400884
·
verified ·
1 Parent(s): d77b04a

Upload api_clients.py

Browse files
Files changed (1) hide show
  1. utils/api_clients.py +194 -0
utils/api_clients.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils/api_clients.py
3
+ ------------------------------------------------
4
+ Enhanced API clients for:
5
+ - PubMed (NCBI)
6
+ - ClinicalTrials.gov
7
+ - FDA Open Data
8
+ - WHO ICTRP
9
+ ------------------------------------------------
10
+ Optimized for hybrid VAN-based query processing:
11
+ - Automatically truncates long queries (top keywords only)
12
+ - Resilient to API downtime or malformed responses
13
+ - HTML formatted results for Gradio rendering
14
+ """
15
+
16
+ import requests
17
+ import html
18
+ import re
19
+ import traceback
20
+
21
+ # ============================================================
22
+ # 🔹 Query Normalization
23
+ # ============================================================
24
+ def _normalize_query(query: str, max_words: int = 5) -> str:
25
+ """
26
+ Cleans and shortens user query for API compatibility.
27
+ Removes filler phrases and limits to key words.
28
+ """
29
+ q = query.lower()
30
+ q = re.sub(
31
+ r"(what is|define|explain|describe|in clinical trials|the meaning of|tell me about|explanation of|concept of)\b",
32
+ "",
33
+ q,
34
+ )
35
+ q = re.sub(r"[^a-z0-9\s]", "", q)
36
+ q = re.sub(r"\s+", " ", q).strip()
37
+
38
+ # limit to first few words (avoid 404s from overlong queries)
39
+ words = q.split()
40
+ q = " ".join(words[:max_words])
41
+ return q or "clinical trial"
42
+
43
+ # ============================================================
44
+ # 🔹 PubMed API (NCBI E-Utilities)
45
+ # ============================================================
46
+ def fetch_pubmed(query: str, limit: int = 3) -> str:
47
+ try:
48
+ q = _normalize_query(query)
49
+ base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
50
+ esearch = f"{base}esearch.fcgi?db=pubmed&term={q}&retmax={limit}&retmode=json"
51
+ res = requests.get(esearch, timeout=10)
52
+ res.raise_for_status()
53
+
54
+ ids = res.json().get("esearchresult", {}).get("idlist", [])
55
+ if not ids:
56
+ return f"<i>No PubMed results found for <b>{html.escape(q)}</b>.</i>"
57
+
58
+ summaries = []
59
+ for pmid in ids:
60
+ summary_url = f"{base}esummary.fcgi?db=pubmed&id={pmid}&retmode=json"
61
+ sres = requests.get(summary_url, timeout=10)
62
+ sres.raise_for_status()
63
+ doc = sres.json()["result"].get(pmid, {})
64
+ title = html.escape(doc.get("title", "Untitled"))
65
+ source = html.escape(doc.get("source", ""))
66
+ pubdate = html.escape(doc.get("pubdate", ""))
67
+ link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
68
+ summaries.append(
69
+ f"<b>{title}</b><br>{source} ({pubdate})<br>"
70
+ f"<a href='{link}' target='_blank'>[PubMed]</a>"
71
+ )
72
+
73
+ return "<br><br>".join(summaries)
74
+
75
+ except Exception as e:
76
+ traceback.print_exc()
77
+ return f"<i>PubMed fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
78
+
79
+ # ============================================================
80
+ # 🔹 ClinicalTrials.gov API
81
+ # ============================================================
82
+ def fetch_clinicaltrials(query: str, limit: int = 3) -> str:
83
+ """
84
+ Retrieves brief summaries of matching trials from ClinicalTrials.gov.
85
+ Automatically truncates query to avoid 404s on long input.
86
+ """
87
+ try:
88
+ q = _normalize_query(query)
89
+ url = (
90
+ f"https://clinicaltrials.gov/api/query/study_fields?"
91
+ f"expr={q}&fields=NCTId,BriefTitle,Condition,OverallStatus"
92
+ f"&max_rnk={limit}&fmt=json"
93
+ )
94
+ res = requests.get(url, timeout=10)
95
+ res.raise_for_status()
96
+
97
+ studies = res.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
98
+ if not studies:
99
+ return f"<i>No trials found for <b>{html.escape(q)}</b>.</i>"
100
+
101
+ formatted = []
102
+ for s in studies:
103
+ nct = s.get("NCTId", [""])[0]
104
+ title = html.escape(s.get("BriefTitle", [""])[0])
105
+ condition = html.escape(", ".join(s.get("Condition", [])))
106
+ status = html.escape(s.get("OverallStatus", ["Unknown"])[0])
107
+ link = f"https://clinicaltrials.gov/study/{nct}" if nct else "#"
108
+ formatted.append(
109
+ f"<b>{title}</b><br>"
110
+ f"Condition: {condition or 'N/A'}<br>"
111
+ f"Status: {status}<br>"
112
+ f"<a href='{link}' target='_blank'>[ClinicalTrials.gov]</a>"
113
+ )
114
+
115
+ return "<br><br>".join(formatted)
116
+
117
+ except Exception as e:
118
+ traceback.print_exc()
119
+ return f"<i>ClinicalTrials.gov fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
120
+
121
+ # ============================================================
122
+ # 🔹 FDA Open Data API
123
+ # ============================================================
124
+ def fetch_fda(query: str, limit: int = 3) -> str:
125
+ """
126
+ Retrieves FDA label and safety data for a given compound/drug name.
127
+ """
128
+ try:
129
+ q = _normalize_query(query)
130
+ url = f"https://api.fda.gov/drug/label.json?search=openfda.brand_name:{q}&limit={limit}"
131
+ res = requests.get(url, timeout=10)
132
+
133
+ if res.status_code == 404:
134
+ return f"<i>No FDA data found for <b>{html.escape(q)}</b>.</i>"
135
+
136
+ res.raise_for_status()
137
+ data = res.json().get("results", [])
138
+ if not data:
139
+ return f"<i>No FDA label results found for <b>{html.escape(q)}</b>.</i>"
140
+
141
+ formatted = []
142
+ for entry in data:
143
+ brand = ", ".join(entry.get("openfda", {}).get("brand_name", []))
144
+ generic = ", ".join(entry.get("openfda", {}).get("generic_name", []))
145
+ purpose = html.escape(" ".join(entry.get("purpose", [])[:1]))
146
+ warnings = html.escape(" ".join(entry.get("warnings", [])[:1]))
147
+ link = "https://open.fda.gov/drug/label/"
148
+ formatted.append(
149
+ f"<b>{brand or q}</b> ({generic or 'N/A'})<br>"
150
+ f"<u>Purpose:</u> {purpose or 'N/A'}<br>"
151
+ f"<u>Warning:</u> {warnings or 'N/A'}<br>"
152
+ f"<a href='{link}' target='_blank'>[FDA Label]</a>"
153
+ )
154
+
155
+ return "<br><br>".join(formatted)
156
+
157
+ except Exception as e:
158
+ traceback.print_exc()
159
+ return f"<i>FDA fetch failed for <b>{html.escape(query)}</b>: {e}</i>"
160
+
161
+ # ============================================================
162
+ # 🔹 WHO ICTRP (Backup Trial Source)
163
+ # ============================================================
164
+ def fetch_who_trials(query: str, limit: int = 2) -> str:
165
+ """
166
+ Optional backup trial search from WHO ICTRP API.
167
+ Returns simplified summaries for readability.
168
+ """
169
+ try:
170
+ q = _normalize_query(query)
171
+ url = f"https://trialsearch.who.int/api/TrialSearch?query={q}"
172
+ res = requests.get(url, timeout=10)
173
+
174
+ if res.status_code != 200:
175
+ return "<i>WHO ICTRP API unavailable or throttled.</i>"
176
+
177
+ trials = res.json().get("TrialSearchResult", [])
178
+ if not trials:
179
+ return f"<i>No WHO trials found for <b>{html.escape(q)}</b>.</i>"
180
+
181
+ formatted = []
182
+ for t in trials[:limit]:
183
+ title = html.escape(t.get("Scientific_title", "Untitled"))
184
+ registry = html.escape(t.get("Register", ""))
185
+ country = html.escape(t.get("Recruitment_Country", ""))
186
+ formatted.append(
187
+ f"<b>{title}</b><br>{registry or 'Registry Unknown'} — {country or 'N/A'}"
188
+ )
189
+
190
+ return "<br><br>".join(formatted)
191
+
192
+ except Exception as e:
193
+ traceback.print_exc()
194
+ return f"<i>WHO ICTRP fetch failed for <b>{html.escape(query)}</b>: {e}</i>"