Spaces:

imadreamerboy
/

ADE-Explorer-MCP

Sleeping

App Files Files Community

Jonas commited on Jun 7, 2025

Commit

e382e80

1 Parent(s): a7f1030

Enhance app.py and openfda_client.py to support configurable limits for adverse events, serious outcomes, and report sources; update data retrieval functions to include total report counts for improved output context.

Browse files

Files changed (2) hide show

app.py +56 -14
openfda_client.py +59 -13

app.py CHANGED Viewed

@@ -34,12 +34,13 @@ def format_pair_frequency_results(data: dict, drug_name: str, event_name: str) -
 # --- Tool Functions ---
-def top_adverse_events_tool(drug_name: str, patient_sex: str = "all", min_age: int = 0, max_age: int = 120):
     """
     MCP Tool: Finds the top reported adverse events for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred! A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
         patient_sex (str): The patient's sex to filter by.
         min_age (int): The minimum age for the filter.
         max_age (int): The maximum age for the filter.
@@ -64,7 +65,7 @@ def top_adverse_events_tool(drug_name: str, patient_sex: str = "all", min_age: i
     if min_age > 0 or max_age < 120:
         age_range = (min_age, max_age)
-    data = get_top_adverse_events(drug_name, patient_sex=sex_code, age_range=age_range)
     if "error" in data:
         error_message = f"An error occurred: {data['error']}"
@@ -79,24 +80,32 @@ def top_adverse_events_tool(drug_name: str, patient_sex: str = "all", min_age: i
     df = pd.DataFrame(data["results"])
     df = df.rename(columns={"term": "Adverse Event", "count": "Report Count"})
     header = (
-        f"### Top Adverse Events for '{drug_name.title()}'\n"
         "**Source**: FDA FAERS via OpenFDA\n"
         "**Disclaimer**: Spontaneous reports do not prove causation. Consult a healthcare professional."
     )
     return chart, df, header
-def serious_outcomes_tool(drug_name: str):
     """
     MCP Tool: Finds the top reported serious outcomes for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred. A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
     Returns:
         tuple: A Plotly figure, a Pandas DataFrame, and a summary string.
     """
-    data = get_serious_outcomes(drug_name)
     if "error" in data:
         error_message = f"An error occurred: {data['error']}"
@@ -111,8 +120,16 @@ def serious_outcomes_tool(drug_name: str):
     df = pd.DataFrame(data["results"])
     df = df.rename(columns={"term": "Serious Outcome", "count": "Report Count"})
     header = (
-        f"### Top Serious Outcomes for '{drug_name.title()}'\n"
         "**Source**: FDA FAERS via OpenFDA\n"
         "**Disclaimer**: Spontaneous reports do not prove causation. Consult a healthcare professional."
     )
@@ -153,27 +170,43 @@ def time_series_tool(drug_name: str, event_name: str, aggregation: str):
     chart = create_time_series_chart(data, drug_name, event_name, time_aggregation=agg_code)
     return chart
-def report_source_tool(drug_name: str):
     """
     MCP Tool: Creates a pie chart of report sources for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred. A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
     Returns:
-        A Plotly figure and a string for the Markdown output.
     """
-    data = get_report_source_data(drug_name)
     if "error" in data:
-        return None, f"An error occurred: {data['error']}"
     if not data or not data.get("results"):
         message = f"No report source data found for '{drug_name}'."
-        return create_placeholder_chart(message), message
     chart = create_pie_chart(data, drug_name)
-    return chart, ""
 # --- Gradio Interface ---
@@ -194,6 +227,12 @@ interface1 = gr.Interface(
             label="Drug Name",
             info="Enter a brand or generic drug name (e.g., 'Aspirin', 'Lisinopril')."
         ),
         gr.Radio(
             ["All", "Male", "Female"],
             label="Patient Sex",
@@ -230,7 +269,8 @@ interface3 = gr.Interface(
         gr.Textbox(
             label="Drug Name",
             info="Enter a brand or generic drug name (e.g., 'Aspirin', 'Lisinopril')."
-        )
     ],
     outputs=[
         gr.Plot(label="Top Serious Outcomes Chart"),
@@ -274,10 +314,12 @@ interface4 = gr.Interface(
 interface5 = gr.Interface(
     fn=report_source_tool,
     inputs=[
-        gr.Textbox(label="Drug Name", info="e.g., 'Aspirin', 'Lisinopril'")
     ],
     outputs=[
         gr.Plot(label="Report Source Breakdown"),
         gr.Markdown()
     ],
     title="Report Source Breakdown",

 # --- Tool Functions ---
+def top_adverse_events_tool(drug_name: str, top_n: int = 10, patient_sex: str = "all", min_age: int = 0, max_age: int = 120):
     """
     MCP Tool: Finds the top reported adverse events for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred! A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
+        top_n (int): The number of top adverse events to return.
         patient_sex (str): The patient's sex to filter by.
         min_age (int): The minimum age for the filter.
         max_age (int): The maximum age for the filter.
     if min_age > 0 or max_age < 120:
         age_range = (min_age, max_age)
+    data = get_top_adverse_events(drug_name, limit=top_n, patient_sex=sex_code, age_range=age_range)
     if "error" in data:
         error_message = f"An error occurred: {data['error']}"
     df = pd.DataFrame(data["results"])
     df = df.rename(columns={"term": "Adverse Event", "count": "Report Count"})
+    total_reports = data.get("meta", {}).get("total_reports_for_query", 0)
+    if total_reports > 0:
+        df['Relative Frequency (%)'] = ((df['Report Count'] / total_reports) * 100).round(2)
+    else:
+        df['Relative Frequency (%)'] = 0.0
     header = (
+        f"### Top {len(df)} Adverse Events for '{drug_name.title()}'\n"
+        f"Based on **{total_reports:,}** total reports matching the given filters.\n"
         "**Source**: FDA FAERS via OpenFDA\n"
         "**Disclaimer**: Spontaneous reports do not prove causation. Consult a healthcare professional."
     )
     return chart, df, header
+def serious_outcomes_tool(drug_name: str, top_n: int = 6):
     """
     MCP Tool: Finds the top reported serious outcomes for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred. A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
+        top_n (int): The number of top serious outcomes to return.
     Returns:
         tuple: A Plotly figure, a Pandas DataFrame, and a summary string.
     """
+    data = get_serious_outcomes(drug_name, limit=top_n)
     if "error" in data:
         error_message = f"An error occurred: {data['error']}"
     df = pd.DataFrame(data["results"])
     df = df.rename(columns={"term": "Serious Outcome", "count": "Report Count"})
+    total_serious_reports = data.get("meta", {}).get("total_reports_for_query", 0)
+    if total_serious_reports > 0:
+        df['% of Serious Reports'] = ((df['Report Count'] / total_serious_reports) * 100).round(2)
+    else:
+        df['% of Serious Reports'] = 0.0
     header = (
+        f"### Top {len(df)} Serious Outcomes for '{drug_name.title()}'\n"
+        f"Out of **{total_serious_reports:,}** total serious reports. "
+        "Note: a single report may be associated with multiple outcomes.\n"
         "**Source**: FDA FAERS via OpenFDA\n"
         "**Disclaimer**: Spontaneous reports do not prove causation. Consult a healthcare professional."
     )
     chart = create_time_series_chart(data, drug_name, event_name, time_aggregation=agg_code)
     return chart
+def report_source_tool(drug_name: str, top_n: int = 5):
     """
     MCP Tool: Creates a pie chart of report sources for a given drug.
     Args:
         drug_name (str): The generic name of the drug is preferred. A small sample of brand names (e.g., 'Tylenol') are converted to generic names for demonstration purposes.
+        top_n (int): The number of top sources to return.
     Returns:
+        tuple: A Plotly figure, a Pandas DataFrame, and a summary string.
     """
+    data = get_report_source_data(drug_name, limit=top_n)
     if "error" in data:
+        error_message = f"An error occurred: {data['error']}"
+        return create_placeholder_chart(error_message), pd.DataFrame(), error_message
     if not data or not data.get("results"):
         message = f"No report source data found for '{drug_name}'."
+        return create_placeholder_chart(message), pd.DataFrame(), message
     chart = create_pie_chart(data, drug_name)
+    df = pd.DataFrame(data['results'])
+    df = df.rename(columns={"term": "Source", "count": "Report Count"})
+    total_reports = data.get("meta", {}).get("total_reports_for_query", 0)
+    if total_reports > 0:
+        df['Percentage'] = ((df['Report Count'] / total_reports) * 100).round(2)
+    else:
+        df['Percentage'] = 0.0
+    header = (
+        f"### Report Sources for '{drug_name.title()}'\n"
+        f"Based on **{total_reports:,}** reports with source information."
+    )
+    return chart, df, header
 # --- Gradio Interface ---
             label="Drug Name",
             info="Enter a brand or generic drug name (e.g., 'Aspirin', 'Lisinopril')."
         ),
+        gr.Slider(
+            5, 50,
+            value=10,
+            label="Number of Events to Show",
+            step=1
+        ),
         gr.Radio(
             ["All", "Male", "Female"],
             label="Patient Sex",
         gr.Textbox(
             label="Drug Name",
             info="Enter a brand or generic drug name (e.g., 'Aspirin', 'Lisinopril')."
+        ),
+        gr.Slider(1, 6, value=6, label="Number of Outcomes to Show", step=1),
     ],
     outputs=[
         gr.Plot(label="Top Serious Outcomes Chart"),
 interface5 = gr.Interface(
     fn=report_source_tool,
     inputs=[
+        gr.Textbox(label="Drug Name", info="e.g., 'Aspirin', 'Lisinopril'"),
+        gr.Slider(1, 5, value=5, label="Number of Sources to Show", step=1),
     ],
     outputs=[
         gr.Plot(label="Report Source Breakdown"),
+        gr.DataFrame(label="Report Source Data", interactive=False),
         gr.Markdown()
     ],
     title="Report Source Breakdown",

openfda_client.py CHANGED Viewed

@@ -168,8 +168,9 @@ def get_top_adverse_events(drug_name: str, limit: int = 10, patient_sex: Optiona
     if cache_key in cache:
         return cache[cache_key]
-    query = (
-        f'search={search_query}'
         f'&count=patient.reaction.reactionmeddrapt.exact&limit={limit}'
     )
@@ -177,10 +178,22 @@ def get_top_adverse_events(drug_name: str, limit: int = 10, patient_sex: Optiona
         # Respect rate limits
         time.sleep(REQUEST_DELAY_SECONDS)
-        response = requests.get(f"{API_BASE_URL}?{query}")
         response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
         data = response.json()
         cache[cache_key] = data
         return data
@@ -241,14 +254,14 @@ def get_drug_event_pair_frequency(drug_name: str, event_name: str) -> dict:
     except Exception as e:
         return {"error": f"An unexpected error occurred: {e}"}
-def get_serious_outcomes(drug_name: str, limit: int = 10) -> dict:
     """
     Query OpenFDA to get the most frequent serious outcomes for a given drug.
     This function makes multiple API calls to count different outcome fields.
     Args:
         drug_name (str): The name of the drug to search for.
-        limit (int): This argument is maintained for signature consistency but is not directly used in the multi-query logic.
     Returns:
         dict: A dictionary containing aggregated results or an error.
@@ -260,7 +273,7 @@ def get_serious_outcomes(drug_name: str, limit: int = 10) -> dict:
     drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
     # Use a cache key for the aggregated result
-    cache_key = f"serious_outcomes_aggregated_{drug_name_processed}"
     if cache_key in cache:
         return cache[cache_key]
@@ -269,6 +282,22 @@ def get_serious_outcomes(drug_name: str, limit: int = 10) -> dict:
     # Base search for all serious reports
     base_search_query = f'patient.drug.medicinalproduct:"{drug_name_processed}"+AND+serious:1'
     for field in SERIOUS_OUTCOME_FIELDS:
         try:
             # Each query counts reports where the specific seriousness field exists
@@ -296,11 +325,14 @@ def get_serious_outcomes(drug_name: str, limit: int = 10) -> dict:
     # Format the results to match the expected structure for plotting
     final_data = {
-        "results": [{"term": k, "count": v} for k, v in aggregated_results.items()]
     }
-    # Sort results by count, descending
     final_data["results"] = sorted(final_data["results"], key=lambda x: x['count'], reverse=True)
     cache[cache_key] = final_data
     return final_data
@@ -352,12 +384,13 @@ def get_time_series_data(drug_name: str, event_name: str) -> dict:
     except Exception as e:
         return {"error": f"An unexpected error occurred: {e}"}
-def get_report_source_data(drug_name: str) -> dict:
     """
     Query OpenFDA to get the breakdown of report sources for a given drug.
     Args:
         drug_name (str): The name of the drug to search for.
     Returns:
         dict: The JSON response from the API, or an error dictionary.
@@ -368,7 +401,7 @@ def get_report_source_data(drug_name: str) -> dict:
     drug_name_processed = drug_name.lower().strip()
     drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
-    cache_key = f"report_source_{drug_name_processed}"
     if cache_key in cache:
         return cache[cache_key]
@@ -385,13 +418,26 @@ def get_report_source_data(drug_name: str) -> dict:
         data = response.json()
-        # Translate the qualification codes to human-readable terms
         if "results" in data:
             for item in data["results"]:
-                # The API returns numeric codes, ensure they are strings for mapping
                 term_str = str(item["term"])
                 item["term"] = QUALIFICATION_MAPPING.get(term_str, f"Unknown ({term_str})")
         cache[cache_key] = data
         return data

     if cache_key in cache:
         return cache[cache_key]
+    # Query for top events by count
+    count_query_url = (
+        f'{API_BASE_URL}?search={search_query}'
         f'&count=patient.reaction.reactionmeddrapt.exact&limit={limit}'
     )
         # Respect rate limits
         time.sleep(REQUEST_DELAY_SECONDS)
+        response = requests.get(count_query_url)
         response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
         data = response.json()
+        # Query for total reports matching the filters
+        total_query_url = f'{API_BASE_URL}?search={search_query}'
+        time.sleep(REQUEST_DELAY_SECONDS)
+        total_response = requests.get(total_query_url)
+        total_response.raise_for_status()
+        total_data = total_response.json()
+        total_reports = total_data.get("meta", {},).get("results", {}).get("total", 0)
+        # Add total to the main data object
+        if 'meta' not in data:
+            data['meta'] = {}
+        data['meta']['total_reports_for_query'] = total_reports
         cache[cache_key] = data
         return data
     except Exception as e:
         return {"error": f"An unexpected error occurred: {e}"}
+def get_serious_outcomes(drug_name: str, limit: int = 6) -> dict:
     """
     Query OpenFDA to get the most frequent serious outcomes for a given drug.
     This function makes multiple API calls to count different outcome fields.
     Args:
         drug_name (str): The name of the drug to search for.
+        limit (int): The maximum number of outcomes to return.
     Returns:
         dict: A dictionary containing aggregated results or an error.
     drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
     # Use a cache key for the aggregated result
+    cache_key = f"serious_outcomes_aggregated_{drug_name_processed}_{limit}"
     if cache_key in cache:
         return cache[cache_key]
     # Base search for all serious reports
     base_search_query = f'patient.drug.medicinalproduct:"{drug_name_processed}"+AND+serious:1'
+    # Get total number of serious reports
+    total_serious_reports = 0
+    try:
+        total_query_url = f"{API_BASE_URL}?search={base_search_query}"
+        time.sleep(REQUEST_DELAY_SECONDS)
+        response = requests.get(total_query_url)
+        if response.status_code == 200:
+            total_data = response.json()
+            total_serious_reports = total_data.get("meta", {}).get("results", {}).get("total", 0)
+        elif response.status_code != 404:
+            # If this call fails, we can still proceed, the total will just be 0.
+            pass
+    except requests.exceptions.RequestException:
+        # If fetching total fails, proceed without it.
+        pass
     for field in SERIOUS_OUTCOME_FIELDS:
         try:
             # Each query counts reports where the specific seriousness field exists
     # Format the results to match the expected structure for plotting
     final_data = {
+        "results": [{"term": k, "count": v} for k, v in aggregated_results.items()],
+        "meta": {"total_reports_for_query": total_serious_reports}
     }
+    # Sort results by count, descending, and then limit
     final_data["results"] = sorted(final_data["results"], key=lambda x: x['count'], reverse=True)
+    if limit:
+        final_data["results"] = final_data["results"][:limit]
     cache[cache_key] = final_data
     return final_data
     except Exception as e:
         return {"error": f"An unexpected error occurred: {e}"}
+def get_report_source_data(drug_name: str, limit: int = 5) -> dict:
     """
     Query OpenFDA to get the breakdown of report sources for a given drug.
     Args:
         drug_name (str): The name of the drug to search for.
+        limit (int): The maximum number of sources to return.
     Returns:
         dict: The JSON response from the API, or an error dictionary.
     drug_name_processed = drug_name.lower().strip()
     drug_name_processed = DRUG_SYNONYM_MAPPING.get(drug_name_processed, drug_name_processed)
+    cache_key = f"report_source_{drug_name_processed}_{limit}"
     if cache_key in cache:
         return cache[cache_key]
         data = response.json()
+        # Translate the qualification codes and calculate total before limiting
         if "results" in data:
+            # Sort by count first
+            data['results'] = sorted(data['results'], key=lambda x: x['count'], reverse=True)
+            # Calculate total from all results before limiting
+            total_with_source = sum(item['count'] for item in data['results'])
+            if 'meta' not in data:
+                data['meta'] = {}
+            data['meta']['total_reports_for_query'] = total_with_source
+            # Translate codes after processing
             for item in data["results"]:
                 term_str = str(item["term"])
                 item["term"] = QUALIFICATION_MAPPING.get(term_str, f"Unknown ({term_str})")
+            # Apply limit
+            if limit:
+                data['results'] = data['results'][:limit]
         cache[cache_key] = data
         return data