SadiaK14 commited on
Commit
a703723
·
verified ·
1 Parent(s): 0621bbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -74
app.py CHANGED
@@ -245,108 +245,171 @@ from tools.final_answer import FinalAnswerTool
245
  from Gradio_UI import GradioUI
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  @tool
249
- def my_custom_tool(arg1: str, arg2: int) -> str:
250
  """
251
- Search and retrieve publicly available medical datasets from Hugging Face based on any medical-related keyword.
252
 
253
  Args:
254
- arg1: A keyword related to medical data (e.g., 'cancer', 'diabetes', 'CT scan', 'radiology', 'dermoscopy').
255
- arg2: The maximum number of datasets to retrieve.
256
 
257
  Returns:
258
- A numbered list (top N) of dataset names matching the search query.
 
259
  """
260
  try:
261
- keyword = arg1.strip().lower()
262
- limit = int(arg2)
 
 
 
 
 
 
263
 
264
- # Define a comprehensive list of medically relevant terms
265
  medical_terms = [
266
- # Anatomy / Body Parts
267
- "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
268
- "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
269
- "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm",
270
- "shoulder", "pelvis",
271
-
272
- # Diseases / Conditions
273
- "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema",
274
- "melanoma", "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis",
275
- "epilepsy", "glaucoma", "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis",
276
- "anemia", "obesity", "depression", "anxiety", "bipolar", "autism", "adhd", "ptsd",
277
- "psychosis", "schizophrenia",
278
-
279
- # Imaging Modalities
280
- "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography",
281
- "radiography", "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy",
282
- "biopsy", "histopathology",
283
-
284
- # Medical Specialties
285
- "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology",
286
- "dentistry", "ophthalmology", "urology", "orthopedics", "gastroenterology",
287
- "pulmonology", "nephrology", "psychiatry", "pediatrics", "geriatrics",
288
- "infectious disease",
289
-
290
- # Symptoms / Signs
291
- "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
292
- "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
293
-
294
- # Common Specific Diseases
295
- "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
296
- "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
297
-
298
- # Procedures / Interventions
299
- "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation",
300
- "stenting", "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics",
301
- "orthotics",
302
-
303
- # Lab Tests / Biomarkers
304
- "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
305
- "pcr", "serology", "antibody", "antigen",
306
-
307
- # Clinical Settings / Roles
308
- "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
309
- "medical record", "electronic health record", "ehr", "vitals",
310
-
311
- # Age-based Terms
312
- "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
313
-
314
- # Epidemiology / Public Health
315
- "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
316
- "risk factor", "social determinant",
317
-
318
- # Pharmacology / Medications
319
- "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
320
- "vaccine", "clinical trial", "placebo"
321
  ]
322
 
323
-
324
  if not any(term in keyword for term in medical_terms):
325
- return f"No medical datasets found for '{arg1}'. Please try another medical term."
326
 
327
- # Query Hugging Face API
328
  try:
329
  response = requests.get(
330
- f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
331
  timeout=10
332
  )
333
  response.raise_for_status()
334
  datasets = response.json()
335
  except Exception:
336
- # Offline fallback
337
- datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
338
 
 
339
  if not datasets:
340
- return f"No datasets found for '{arg1}'."
341
 
342
- # Format results neatly with numbered bullets
343
  formatted = "\n".join(
344
- [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:limit])]
345
  )
346
- return f"Medical datasets related to '{arg1}':\n{formatted}"
 
347
 
348
  except Exception as e:
349
- return f"Error searching medical datasets for '{arg1}': {str(e)}"
350
 
351
 
352
  @tool
 
245
  from Gradio_UI import GradioUI
246
 
247
 
248
+ # @tool
249
+ # def my_custom_tool(arg1: str, arg2: int) -> str:
250
+ # """
251
+ # Search and retrieve publicly available medical datasets from Hugging Face based on any medical-related keyword.
252
+
253
+ # Args:
254
+ # arg1: A keyword related to medical data (e.g., 'cancer', 'diabetes', 'CT scan', 'radiology', 'dermoscopy').
255
+ # arg2: The maximum number of datasets to retrieve.
256
+
257
+ # Returns:
258
+ # A numbered list (top N) of dataset names matching the search query.
259
+ # """
260
+ # try:
261
+ # keyword = arg1.strip().lower()
262
+ # limit = int(arg2)
263
+
264
+ # # Define a comprehensive list of medically relevant terms
265
+ # medical_terms = [
266
+ # # Anatomy / Body Parts
267
+ # "skin", "brain", "lung", "chest", "abdomen", "spine", "bone", "heart", "liver", "kidney",
268
+ # "bladder", "stomach", "colon", "rectum", "esophagus", "pancreas", "breast", "ear", "eye",
269
+ # "retina", "tooth", "teeth", "tongue", "jaw", "neck", "wrist", "hand", "leg", "arm",
270
+ # "shoulder", "pelvis",
271
+
272
+ # # Diseases / Conditions
273
+ # "cancer", "tumor", "stroke", "diabetes", "pneumonia", "covid", "asthma", "eczema",
274
+ # "melanoma", "hypertension", "alzheimer", "parkinson", "arthritis", "scoliosis",
275
+ # "epilepsy", "glaucoma", "ulcer", "hepatitis", "leukemia", "lymphoma", "tuberculosis",
276
+ # "anemia", "obesity", "depression", "anxiety", "bipolar", "autism", "adhd", "ptsd",
277
+ # "psychosis", "schizophrenia",
278
+
279
+ # # Imaging Modalities
280
+ # "mri", "ct", "xray", "x-ray", "ultrasound", "pet", "fmri", "mammo", "angiography",
281
+ # "radiography", "echocardiogram", "spect", "dermoscopy", "colonoscopy", "endoscopy",
282
+ # "biopsy", "histopathology",
283
+
284
+ # # Medical Specialties
285
+ # "radiology", "pathology", "oncology", "cardiology", "neurology", "dermatology",
286
+ # "dentistry", "ophthalmology", "urology", "orthopedics", "gastroenterology",
287
+ # "pulmonology", "nephrology", "psychiatry", "pediatrics", "geriatrics",
288
+ # "infectious disease",
289
+
290
+ # # Symptoms / Signs
291
+ # "lesion", "infection", "fever", "pain", "inflammation", "rash", "headache", "swelling",
292
+ # "cough", "seizure", "dizziness", "vomiting", "diarrhea", "nausea", "fatigue", "itching",
293
+
294
+ # # Common Specific Diseases
295
+ # "breast cancer", "prostate cancer", "lung cancer", "skin cancer", "colon cancer",
296
+ # "brain tumor", "liver cancer", "cervical cancer", "bladder cancer", "thyroid cancer",
297
+
298
+ # # Procedures / Interventions
299
+ # "surgery", "chemotherapy", "radiation", "transplant", "dialysis", "intubation",
300
+ # "stenting", "ventilation", "vaccination", "anesthesia", "rehabilitation", "prosthetics",
301
+ # "orthotics",
302
+
303
+ # # Lab Tests / Biomarkers
304
+ # "blood test", "cbc", "glucose", "hemoglobin", "cholesterol", "biomarker", "urinalysis",
305
+ # "pcr", "serology", "antibody", "antigen",
306
+
307
+ # # Clinical Settings / Roles
308
+ # "icu", "hospital", "emergency", "clinical notes", "nursing", "physician", "patient",
309
+ # "medical record", "electronic health record", "ehr", "vitals",
310
+
311
+ # # Age-based Terms
312
+ # "pediatric", "neonatal", "infant", "child", "adolescent", "geriatrics", "elderly",
313
+
314
+ # # Epidemiology / Public Health
315
+ # "epidemiology", "prevalence", "incidence", "mortality", "public health", "health disparity",
316
+ # "risk factor", "social determinant",
317
+
318
+ # # Pharmacology / Medications
319
+ # "drug", "medication", "pharmacology", "side effect", "adverse event", "dose", "tablet",
320
+ # "vaccine", "clinical trial", "placebo"
321
+ # ]
322
+
323
+
324
+ # if not any(term in keyword for term in medical_terms):
325
+ # return f"No medical datasets found for '{arg1}'. Please try another medical term."
326
+
327
+ # # Query Hugging Face API
328
+ # try:
329
+ # response = requests.get(
330
+ # f"https://huggingface.co/api/datasets?search={keyword}&limit={limit}",
331
+ # timeout=10
332
+ # )
333
+ # response.raise_for_status()
334
+ # datasets = response.json()
335
+ # except Exception:
336
+ # # Offline fallback
337
+ # datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(limit)]
338
+
339
+ # if not datasets:
340
+ # return f"No datasets found for '{arg1}'."
341
+
342
+ # # Format results neatly with numbered bullets
343
+ # formatted = "\n".join(
344
+ # [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:limit])]
345
+ # )
346
+ # return f"Medical datasets related to '{arg1}':\n{formatted}"
347
+
348
+ # except Exception as e:
349
+ # return f"Error searching medical datasets for '{arg1}': {str(e)}"
350
+ from smolagents import tool
351
+ import requests
352
+
353
  @tool
354
+ def my_custom_tool(keyword: str, top_n: int) -> str:
355
  """
356
+ Search and retrieve publicly available medical datasets from Hugging Face based on a given keyword.
357
 
358
  Args:
359
+ keyword: A medical-related keyword (e.g., 'cancer', 'heart', 'xray', 'radiology', 'dermoscopy').
360
+ top_n: The maximum number of datasets to retrieve.
361
 
362
  Returns:
363
+ A numbered list (top N) of Hugging Face dataset names matching the search query.
364
+ If no relevant results are found, a helpful message is returned.
365
  """
366
  try:
367
+ keyword = keyword.strip().lower()
368
+
369
+ # Guardrail: Prevent using dataset IDs instead of search keywords
370
+ if "/" in keyword:
371
+ return (
372
+ f"'{keyword}' looks like a dataset ID, not a keyword. "
373
+ f"Please provide a general medical keyword (e.g., 'heart', 'cancer', 'xray')."
374
+ )
375
 
376
+ # Define medically relevant terms
377
  medical_terms = [
378
+ "skin", "brain", "lung", "chest", "heart", "cancer", "tumor", "radiology",
379
+ "xray", "ultrasound", "pathology", "ct", "mri", "covid", "stroke", "diabetes",
380
+ "melanoma", "lesion", "infection", "fever", "pain", "inflammation", "rash",
381
+ "biopsy", "histopathology", "blood", "icu", "ehr", "patient", "vitals",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  ]
383
 
384
+ # Validate the keyword
385
  if not any(term in keyword for term in medical_terms):
386
+ return f"No medical datasets found for '{keyword}'. Please try another medical term."
387
 
388
+ # Query Hugging Face datasets API
389
  try:
390
  response = requests.get(
391
+ f"https://huggingface.co/api/datasets?search={keyword}&limit={top_n}",
392
  timeout=10
393
  )
394
  response.raise_for_status()
395
  datasets = response.json()
396
  except Exception:
397
+ # Fallback in case of API issues or no internet
398
+ datasets = [{"id": f"example/{keyword}-dataset-{i+1}"} for i in range(top_n)]
399
 
400
+ # Handle case when no datasets are found
401
  if not datasets:
402
+ return f"No Hugging Face datasets found for '{keyword}'."
403
 
404
+ # Format output as a clean, numbered list
405
  formatted = "\n".join(
406
+ [f"- Dataset {i+1}: {ds.get('id', 'Unknown')}" for i, ds in enumerate(datasets[:top_n])]
407
  )
408
+
409
+ return f"Top {len(datasets[:top_n])} Hugging Face datasets related to '{keyword}':\n{formatted}"
410
 
411
  except Exception as e:
412
+ return f"Error while searching Hugging Face datasets for '{keyword}': {str(e)}"
413
 
414
 
415
  @tool