SadiaK14 commited on
Commit
ae6549c
·
verified ·
1 Parent(s): 55c2901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -136
app.py CHANGED
@@ -245,170 +245,111 @@ from tools.final_answer import FinalAnswerTool
245
  from Gradio_UI import GradioUI
246
 
247
 
248
- # @tool
249
- # def my_custom_tool(arg1: str, arg2: int) -> str:
250
- # """
251
- # Search and retrieve publicly available medical datasets from Hugging Face based on any medical-related keyword.
252
-
253
- # Args:
254
- # arg1: A keyword related to medical data (e.g., 'cancer', 'diabetes', 'CT scan', 'radiology', 'dermoscopy').
255
- # arg2: The maximum number of datasets to retrieve.
256
-
257
- # Returns:
258
- # A numbered list (top N) of dataset names matching the search query.
259
- # """
260
- # try:
261
- # keyword = arg1.strip().lower()
262
- # limit = int(arg2)
263
-
264
- # # Define a comprehensive list of medically relevant terms
265
- # medical_terms = [
266
- # # Anatomy / Body Parts
267
- # "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
268
- # "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
269
- # "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm",
270
- # "shoulder", "pelvis",
271
-
272
- # # Diseases / Conditions
273
- # "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema",
274
- # "melanoma", "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis",
275
- # "epilepsy", "glaucoma", "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis",
276
- # "anemia", "obesity", "depression", "anxiety", "bipolar", "autism", "adhd", "ptsd",
277
- # "psychosis", "schizophrenia",
278
-
279
- # # Imaging Modalities
280
- # "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography",
281
- # "radiography", "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy",
282
- # "biopsy", "histopathology",
283
-
284
- # # Medical Specialties
285
- # "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology",
286
- # "dentistry", "ophthalmology", "urology", "orthopedics", "gastroenterology",
287
- # "pulmonology", "nephrology", "psychiatry", "pediatrics", "geriatrics",
288
- # "infectious disease",
289
-
290
- # # Symptoms / Signs
291
- # "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
292
- # "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
293
-
294
- # # Common Specific Diseases
295
- # "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
296
- # "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
297
-
298
- # # Procedures / Interventions
299
- # "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation",
300
- # "stenting", "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics",
301
- # "orthotics",
302
-
303
- # # Lab Tests / Biomarkers
304
- # "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
305
- # "pcr", "serology", "antibody", "antigen",
306
-
307
- # # Clinical Settings / Roles
308
- # "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
309
- # "medical record", "electronic health record", "ehr", "vitals",
310
-
311
- # # Age-based Terms
312
- # "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
313
-
314
- # # Epidemiology / Public Health
315
- # "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
316
- # "risk factor", "social determinant",
317
-
318
- # # Pharmacology / Medications
319
- # "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
320
- # "vaccine", "clinical trial", "placebo"
321
- # ]
322
-
323
-
324
- # if not any(term in keyword for term in medical_terms):
325
- # return f"No medical datasets found for '{arg1}'. Please try another medical term."
326
-
327
- # # Query Hugging Face API
328
- # try:
329
- # response = requests.get(
330
- # f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
331
- # timeout=10
332
- # )
333
- # response.raise_for_status()
334
- # datasets = response.json()
335
- # except Exception:
336
- # # Offline fallback
337
- # datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
338
-
339
- # if not datasets:
340
- # return f"No datasets found for '{arg1}'."
341
-
342
- # # Format results neatly with numbered bullets
343
- # formatted = "\n".join(
344
- # [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:limit])]
345
- # )
346
- # return f"Medical datasets related to '{arg1}':\n{formatted}"
347
-
348
- # except Exception as e:
349
- # return f"Error searching medical datasets for '{arg1}': {str(e)}"
350
-
351
-
352
  @tool
353
- def my_custom_tool(keyword: str, top_n: int) -> str:
354
  """
355
- Search and retrieve publicly available medical datasets from Hugging Face based on a given keyword.
356
 
357
  Args:
358
- keyword: A medical-related keyword (e.g., 'cancer', 'heart', 'xray', 'radiology', 'dermoscopy').
359
- top_n: The maximum number of datasets to retrieve.
360
 
361
  Returns:
362
- A numbered list (top N) of Hugging Face dataset names matching the search query.
363
- If no relevant results are found, a helpful message is returned.
364
  """
365
  try:
366
- keyword = keyword.strip().lower()
367
-
368
- # Guardrail: Prevent using dataset IDs instead of search keywords
369
- if "/" in keyword:
370
- return (
371
- f"'{keyword}' looks like a dataset ID, not a keyword. "
372
- f"Please provide a general medical keyword (e.g., 'heart', 'cancer', 'xray')."
373
- )
374
 
375
- # Define medically relevant terms
376
  medical_terms = [
377
- "skin", "brain", "lung", "chest", "heart", "cancer", "tumor", "radiology",
378
- "xray", "ultrasound", "pathology", "ct", "mri", "covid", "stroke", "diabetes",
379
- "melanoma", "lesion", "infection", "fever", "pain", "inflammation", "rash",
380
- "biopsy", "histopathology", "blood", "icu", "ehr", "patient", "vitals",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  ]
382
 
383
- # Validate the keyword
384
  if not any(term in keyword for term in medical_terms):
385
- return f"No medical datasets found for '{keyword}'. Please try another medical term."
386
 
387
- # Query Hugging Face datasets API
388
  try:
389
  response = requests.get(
390
- f"https://huggingface.co/api/datasets?search={keyword}&limit={top_n}",
391
  timeout=10
392
  )
393
  response.raise_for_status()
394
  datasets = response.json()
395
  except Exception:
396
- # Fallback in case of API issues or no internet
397
- datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(top_n)]
398
 
399
- # Handle case when no datasets are found
400
  if not datasets:
401
- return f"No Hugging Face datasets found for '{keyword}'."
402
 
403
- # Format output as a clean, numbered list
404
  formatted = "\n".join(
405
- [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:top_n])]
406
  )
407
-
408
- return f"Top {len(datasets[:top_n])} Hugging Face datasets related to '{keyword}':\n{formatted}"
409
 
410
  except Exception as e:
411
- return f"Error while searching Hugging Face datasets for '{keyword}': {str(e)}"
 
 
 
412
 
413
 
414
  @tool
 
245
  from Gradio_UI import GradioUI
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  @tool
249
+ def my_custom_tool(arg1: str, arg2: int) -> str:
250
  """
251
+ Search and retrieve publicly available medical datasets from Hugging Face based on any medical-related keyword.
252
 
253
  Args:
254
+ arg1: A keyword related to medical data (e.g., 'cancer', 'diabetes', 'CT scan', 'radiology', 'dermoscopy').
255
+ arg2: The maximum number of datasets to retrieve.
256
 
257
  Returns:
258
+ A numbered list (top N) of dataset names matching the search query.
 
259
  """
260
  try:
261
+ keyword = arg1.strip().lower()
262
+ limit = int(arg2)
 
 
 
 
 
 
263
 
264
+ # Define a comprehensive list of medically relevant terms
265
  medical_terms = [
266
+ # Anatomy / Body Parts
267
+ "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
268
+ "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
269
+ "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm",
270
+ "shoulder", "pelvis",
271
+
272
+ # Diseases / Conditions
273
+ "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema",
274
+ "melanoma", "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis",
275
+ "epilepsy", "glaucoma", "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis",
276
+ "anemia", "obesity", "depression", "anxiety", "bipolar", "autism", "adhd", "ptsd",
277
+ "psychosis", "schizophrenia",
278
+
279
+ # Imaging Modalities
280
+ "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography",
281
+ "radiography", "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy",
282
+ "biopsy", "histopathology",
283
+
284
+ # Medical Specialties
285
+ "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology",
286
+ "dentistry", "ophthalmology", "urology", "orthopedics", "gastroenterology",
287
+ "pulmonology", "nephrology", "psychiatry", "pediatrics", "geriatrics",
288
+ "infectious disease",
289
+
290
+ # Symptoms / Signs
291
+ "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
292
+ "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
293
+
294
+ # Common Specific Diseases
295
+ "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
296
+ "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
297
+
298
+ # Procedures / Interventions
299
+ "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation",
300
+ "stenting", "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics",
301
+ "orthotics",
302
+
303
+ # Lab Tests / Biomarkers
304
+ "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
305
+ "pcr", "serology", "antibody", "antigen",
306
+
307
+ # Clinical Settings / Roles
308
+ "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
309
+ "medical record", "electronic health record", "ehr", "vitals",
310
+
311
+ # Age-based Terms
312
+ "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
313
+
314
+ # Epidemiology / Public Health
315
+ "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
316
+ "risk factor", "social determinant",
317
+
318
+ # Pharmacology / Medications
319
+ "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
320
+ "vaccine", "clinical trial", "placebo"
321
  ]
322
 
323
+
324
  if not any(term in keyword for term in medical_terms):
325
+ return f"No medical datasets found for '{arg1}'. Please try another medical term."
326
 
327
+ # Query Hugging Face API
328
  try:
329
  response = requests.get(
330
+ f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
331
  timeout=10
332
  )
333
  response.raise_for_status()
334
  datasets = response.json()
335
  except Exception:
336
+ # Offline fallback
337
+ datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
338
 
 
339
  if not datasets:
340
+ return f"No datasets found for '{arg1}'."
341
 
342
+ # Format results neatly with numbered bullets
343
  formatted = "\n".join(
344
+ [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:limit])]
345
  )
346
+ return f"Medical datasets related to '{arg1}':\n{formatted}"
 
347
 
348
  except Exception as e:
349
+ return f"Error searching medical datasets for '{arg1}': {str(e)}"
350
+
351
+
352
+
353
 
354
 
355
  @tool