Spaces:

scigeek
/

pharmaguide

Sleeping

File size: 13,422 Bytes

"""
function_calling.py — OpenFDA API tool functions for PharmaGuide.

Each function queries a specific OpenFDA endpoint and returns clean,
plain-text results ready to pass to the model or display in the UI.

API facts:
  Base URL  : https://api.fda.gov/
  Auth      : No key needed for basic use (1 000 req/day unauthenticated)
              Set OPENFDA_API_KEY env var to raise limit to 40 req/min
  Docs      : https://open.fda.gov/apis/

All functions follow the same contract:
  - Accept a drug name string (case-insensitive)
  - Return a plain string on success
  - Return an empty string "" on any error (caller handles gracefully)
  - Never raise exceptions to the caller
"""

import os
import re
import time
from typing import Optional
import requests

# ── Config ───────────────────────────────────────────────────────────────────

_BASE_URL  = "https://api.fda.gov/drug/label.json"
_EVENT_URL = "https://api.fda.gov/drug/event.json"

# Optional: register a free key at https://open.fda.gov/apis/authentication/
# and set it as an env var.  Without it the limit is 1 000 req/day.
_API_KEY = os.environ.get("OPENFDA_API_KEY", "")

# Simple in-process cache: {cache_key: (timestamp, result)}
# TTL = 3 600 s (1 hour) — FDA labels don't change intra-day.
_CACHE: dict = {}
_CACHE_TTL   = 3_600  # seconds

# ── Keyword lists for lifestyle warning extraction ───────────────────────────

_LIFESTYLE_KEYWORDS = {
    "alcohol":   ["alcohol", "drinking", "drink", "wine", "beer", "liquor", "ethanol"],
    "grapefruit":["grapefruit", "citrus juice", "pomelo"],
    "food":      ["take with food", "take with a meal", "on an empty stomach",
                  "without food", "before eating", "after eating", "with meals"],
    "dairy":     ["dairy", "milk", "antacid", "calcium", "iron", "mineral"],
    "sun":       ["sun", "sunlight", "UV", "photosensitivity", "sunscreen",
                  "ultraviolet", "sunburn"],
    "driving":   ["driving", "operate machinery", "drowsy", "drowsiness",
                  "sedation", "dizzy", "dizziness", "alertness", "operate vehicle"],
    "exercise":  ["exercise", "heat", "dehydration", "dehydrated", "hot weather",
                  "strenuous activity", "sweating", "exertion"],
}

# ── Helpers ──────────────────────────────────────────────────────────────────

def _get(url: str, params: dict) -> Optional[dict]:
    """GET request with caching, retry on 429, and silent error handling."""
    if _API_KEY:
        params["api_key"] = _API_KEY

    cache_key = url + str(sorted(params.items()))
    now = time.time()
    if cache_key in _CACHE:
        ts, result = _CACHE[cache_key]
        if now - ts < _CACHE_TTL:
            return result

    try:
        resp = requests.get(url, params=params, timeout=10)
        if resp.status_code == 429:
            time.sleep(2)
            resp = requests.get(url, params=params, timeout=10)
        if resp.status_code != 200:
            _CACHE[cache_key] = (now, None)
            return None
        data = resp.json()
        _CACHE[cache_key] = (now, data)
        return data
    except Exception:
        return None


def _search_query(drug_name: str) -> str:
    """Build an OpenFDA search query that checks brand AND generic name fields."""
    safe = requests.utils.quote(drug_name)
    return (
        f'openfda.brand_name:"{safe}"+openfda.generic_name:"{safe}"'
    )


def _search_query_exact(drug_name: str) -> str:
    """
    Prefer an exact generic name match to avoid combination drug brand names
    (e.g. searching 'metformin' should not return ZITUVIMET/JANUMET).
    Falls back to the broad query if the exact match returns nothing.
    """
    safe = requests.utils.quote(drug_name)
    return f'openfda.generic_name.exact:"{safe}"'


def _get_with_fallback(url: str, drug_name: str, extra_params: dict = None) -> Optional[dict]:
    """
    Try an exact generic name query first; if no results, fall back to the
    broad brand+generic query.  Avoids returning combination-drug labels
    (e.g. ZITUVIMET) when the user just asked about metformin.
    """
    params = {"limit": 1}
    if extra_params:
        params.update(extra_params)

    params["search"] = _search_query_exact(drug_name)
    data = _get(url, params)
    if data and data.get("results"):
        return data

    params["search"] = _search_query(drug_name)
    return _get(url, params)


def _first_result(data: Optional[dict]) -> Optional[dict]:
    """Return the first result record from an OpenFDA response, or None."""
    if not data:
        return None
    results = data.get("results", [])
    return results[0] if results else None


def _extract_field(record: Optional[dict], *field_names: str) -> str:
    """
    Extract the first non-empty value from one of the given field names in
    an OpenFDA label record.  FDA stores fields as lists of strings.
    Returns a plain string with whitespace normalised, or "".
    """
    if not record:
        return ""
    for field in field_names:
        val = record.get(field)
        if val:
            raw = val[0] if isinstance(val, list) else val
            # Collapse excessive whitespace from FDA's raw text
            return re.sub(r"\s+", " ", str(raw)).strip()[:2000]
    return ""


# ── Public tool functions ────────────────────────────────────────────────────

def get_drug_label(drug_name: str) -> dict:
    """
    Fetch the full drug label record from OpenFDA for a given drug name.

    Returns a dict with keys: warnings, drug_interactions, dosage,
    geriatric_use, indications, adverse_reactions.
    All values are plain strings (≤ 2000 chars each), or "" if not found.
    """
    data   = _get_with_fallback(_BASE_URL, drug_name)
    record = _first_result(data)

    return {
        "warnings":         _extract_field(record, "warnings", "warnings_and_cautions"),
        "drug_interactions":_extract_field(record, "drug_interactions"),
        "dosage":           _extract_field(record, "dosage_and_administration"),
        "geriatric_use":    _extract_field(record, "geriatric_use"),
        "indications":      _extract_field(record, "indications_and_usage"),
        "adverse_reactions":_extract_field(record, "adverse_reactions"),
    }


def check_drug_interactions(drug_name: str) -> str:
    """
    Get drug interaction warnings for a specific drug from its FDA label.

    Returns a plain string with the interaction section text, or "" if
    not available.  Suitable for direct display or passing to the model.
    """
    data   = _get_with_fallback(_BASE_URL, drug_name)
    record = _first_result(data)
    return _extract_field(record, "drug_interactions")


def get_adverse_events(drug_name: str, limit: int = 8) -> str:
    """
    Get the most commonly reported adverse events for a drug from FDA FAERS.

    Queries the /drug/event endpoint and counts reaction terms.
    Returns a plain comma-separated string of the top reactions, or "".

    Args:
        drug_name: Drug name to query.
        limit:     Number of top reactions to return (default 8).
    """
    params = {
        "search": f'patient.drug.medicinalproduct:"{requests.utils.quote(drug_name)}"',
        "count":  "patient.reaction.reactionmeddrapt.exact",
        "limit":  limit,
    }
    data = _get(_EVENT_URL, params)
    if not data:
        return ""
    results = data.get("results", [])
    if not results:
        return ""
    # Each result: {"term": "NAUSEA", "count": 12345}
    terms = [r["term"].lower().replace("_", " ") for r in results]
    return ", ".join(terms)


def get_geriatric_warnings(drug_name: str) -> str:
    """
    Extract geriatric-specific information from a drug's FDA label.

    Returns the geriatric_use section text, or falls back to scanning
    the warnings section for age-related keywords if geriatric_use is empty.
    """
    data   = _get_with_fallback(_BASE_URL, drug_name)
    record = _first_result(data)

    geriatric = _extract_field(record, "geriatric_use")
    if geriatric:
        return geriatric

    # Fallback: scan warnings for age-related content
    warnings = _extract_field(record, "warnings", "warnings_and_cautions")
    if not warnings:
        return ""
    age_keywords = ["older adult", "elderly", "geriatric", "65 years", "aged"]
    sentences = [s.strip() for s in re.split(r"[.!?]", warnings) if s.strip()]
    relevant  = [s for s in sentences
                 if any(kw in s.lower() for kw in age_keywords)]
    return ". ".join(relevant[:3]) + "." if relevant else ""


_SECTION_HEADER_RE = re.compile(r"^\s*\d+(?:\.\d+)?\s+[A-Z][A-Z\s]+")   # "7 DRUG INTERACTIONS"
_PAREN_REF_RE      = re.compile(r"\(\s*\d+(?:\.\d+)?\s*\)")              # "( 5.1 )"
_LEADING_NUM_RE    = re.compile(r"^\s*\d+(?:\.\d+)?\s+")                 # "2 DOSAGE..."


def _clean_lifestyle_sentence(sentence: str) -> str:
    """
    Strip FDA formatting artifacts from a single sentence before display.
    Returns "" if the sentence is just a section header with no useful content.
    """
    # Reject pure section headers like "7 DRUG INTERACTIONS"
    if _SECTION_HEADER_RE.match(sentence) and len(sentence.split()) <= 5:
        return ""

    # Strip inline section references like "( 5.1 )" or "( 2 )"
    sentence = _PAREN_REF_RE.sub("", sentence)

    # Strip leading section numbers like "2 DOSAGE AND ADMINISTRATION"
    sentence = _LEADING_NUM_RE.sub("", sentence)

    # Collapse whitespace and truncate
    sentence = re.sub(r"\s+", " ", sentence).strip()

    # Reject if too short after cleaning or still looks like a header (all caps)
    if len(sentence) < 20 or sentence.isupper():
        return ""

    # Drop sentences that are too long to be a single clean thought
    # (likely mid-paragraph FDA text split at a bad boundary)
    if len(sentence) > 180:
        return ""

    return sentence


def get_lifestyle_warnings(drug_list: list[str]) -> dict:
    """
    Extract food, alcohol, and lifestyle interaction warnings for a list of drugs.

    Scans the warnings and drug_interactions FDA fields for lifestyle-related
    keywords and returns structured plain-language warnings per drug.

    Args:
        drug_list: List of drug name strings.

    Returns:
        Dict keyed by drug name.  Each value is a dict keyed by lifestyle
        category ("alcohol", "grapefruit", "food", "dairy", "sun", "driving",
        "exercise"), with a list of relevant sentence strings as values.
        Categories with no matches are omitted from the inner dict.

    Example return value:
        {
            "metformin": {
                "alcohol": ["Avoid alcohol while taking metformin..."],
                "food":    ["Take metformin with a meal..."],
            },
            "atorvastatin": {
                "grapefruit": ["Avoid grapefruit juice..."],
            },
        }
    """
    result = {}

    for drug_name in drug_list:
        data   = _get_with_fallback(_BASE_URL, drug_name)
        record = _first_result(data)

        # Combine all text fields that might mention lifestyle interactions
        raw_text = " ".join(filter(None, [
            _extract_field(record, "warnings", "warnings_and_cautions"),
            _extract_field(record, "drug_interactions"),
            _extract_field(record, "dosage_and_administration"),
        ]))

        if not raw_text:
            continue

        sentences = [s.strip() for s in re.split(r"[.!?\n]", raw_text) if len(s.strip()) > 15]
        drug_warnings: dict[str, list[str]] = {}

        for category, keywords in _LIFESTYLE_KEYWORDS.items():
            hits = []
            for sentence in sentences:
                sentence_lower = sentence.lower()
                if any(kw in sentence_lower for kw in keywords):
                    clean = _clean_lifestyle_sentence(sentence)
                    if clean and clean not in hits:
                        hits.append(clean)
            if hits:
                drug_warnings[category] = hits[:2]  # max 2 sentences per category

        if drug_warnings:
            result[drug_name] = drug_warnings

    return result


# ── Convenience: fetch all data for a drug list in one call ─────────────────

def fetch_all_drug_data(drug_list: list[str]) -> dict:
    """
    Fetch label data and lifestyle warnings for every drug in drug_list.

    Returns a dict keyed by drug name, each value containing:
        label    : dict from get_drug_label()
        lifestyle: dict from get_lifestyle_warnings() for this drug only
        events   : str from get_adverse_events()

    Suitable for passing to prompts.build_fda_context_prompt().
    """
    lifestyle_all = get_lifestyle_warnings(drug_list)
    combined = {}
    for name in drug_list:
        combined[name] = {
            "label":     get_drug_label(name),
            "lifestyle": lifestyle_all.get(name, {}),
            "events":    get_adverse_events(name),
        }
    return combined