Spaces:

NurseCitizenDeveloper
/

nursing-ebp-tool

Running

File size: 7,231 Bytes
"""
Nursing-focused abstract summariser.

No ML model required — uses structured-abstract parsing + heuristic sentence
scoring so the app stays fast on a free CPU Hugging Face Space.
"""

import re

# Section headers commonly found in PubMed structured abstracts
_SECTION_ALIASES = {
    "background":     ["BACKGROUND", "INTRODUCTION", "CONTEXT"],
    "objective":      ["OBJECTIVE", "OBJECTIVES", "PURPOSE", "AIM", "AIMS", "GOAL"],
    "methods":        ["METHODS", "METHOD", "DESIGN", "PARTICIPANTS", "SETTING",
                       "MATERIALS AND METHODS", "STUDY DESIGN"],
    "results":        ["RESULTS", "FINDINGS", "OUTCOMES"],
    "conclusions":    ["CONCLUSIONS", "CONCLUSION", "IMPLICATIONS",
                       "CLINICAL IMPLICATIONS", "SIGNIFICANCE", "SUMMARY"],
}

# Build reverse map: uppercase alias → canonical key
_ALIAS_MAP: dict[str, str] = {}
for canonical, aliases in _SECTION_ALIASES.items():
    for alias in aliases:
        _ALIAS_MAP[alias.upper()] = canonical

# Evidence-level inference (Melnyk & Fineout-Overholt hierarchy)
_EVIDENCE_RULES = [
    (["systematic review", "meta-analysis", "meta analysis"],
     "Level I",   "🟢", "Systematic Review / Meta-Analysis"),
    (["randomized controlled", "randomised controlled", "rct"],
     "Level II",  "🟡", "Randomized Controlled Trial"),
    (["quasi-experimental", "quasi experimental", "pre-post", "before-after"],
     "Level III", "🟠", "Quasi-Experimental Study"),
    (["cohort", "case-control", "case control", "longitudinal"],
     "Level IV",  "🟠", "Cohort / Case-Control Study"),
    (["systematic review of qualitative"],
     "Level V",   "🔵", "Systematic Review of Qualitative Studies"),
    (["qualitative", "grounded theory", "phenomenolog", "ethnograph"],
     "Level VI",  "🔵", "Qualitative / Descriptive Study"),
    (["expert opinion", "committee report", "consensus"],
     "Level VII", "⚫", "Expert Opinion / Consensus"),
]

# Keywords that suggest nursing relevance for implication extraction
_NURSING_KEYWORDS = [
    "nurse", "nursing", "patient", "care", "clinical", "practice",
    "intervention", "outcome", "recommend", "implication", "significant",
    "effective", "improve", "reduce", "prevent", "education", "safety",
]


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def infer_evidence_level(title: str, abstract: str = "") -> tuple[str, str, str]:
    """Return (level_code, emoji, description) based on study text."""
    text = (title + " " + abstract).lower()
    for keywords, code, emoji, desc in _EVIDENCE_RULES:
        if any(kw in text for kw in keywords):
            return code, emoji, desc
    return "Unclassified", "⚪", "Evidence level not determined"


def summarize(abstract: str, title: str = "") -> dict:
    """
    Parse abstract into nursing-focused sections.

    Returns dict with keys:
        overview, methods, key_findings, nursing_implications, evidence_level
    """
    if not abstract or abstract == "Abstract not available.":
        return _empty_summary()

    sections = _parse_structured(abstract)

    overview = (
        sections.get("background")
        or sections.get("objective")
        or _first_sentences(abstract, 2)
    )

    methods = sections.get("methods", "")

    key_findings = (
        sections.get("results")
        or _extract_conclusion(abstract)
    )

    nursing_implications = (
        sections.get("conclusions")
        or _extract_nursing_sentences(abstract)
    )

    ev_level = infer_evidence_level(title, abstract)

    return {
        "overview":             _trim(overview, 450),
        "methods":              _trim(methods, 350),
        "key_findings":         _trim(key_findings, 450),
        "nursing_implications": _trim(nursing_implications, 400),
        "evidence_level":       ev_level,
    }


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _parse_structured(abstract: str) -> dict[str, str]:
    """Extract sections from a PubMed-style structured abstract."""
    sections: dict[str, str] = {}
    current_key = None
    current_lines: list[str] = []

    # Match bold markdown labels (from fetch_abstract) or plain ALL-CAPS labels
    label_re = re.compile(
        r"^\*\*([A-Z][A-Z\s/&]+?):\*\*\s*(.*)|^([A-Z][A-Z\s/&]+?):\s*(.*)"
    )

    for raw_line in abstract.split("\n"):
        line = raw_line.strip()
        if not line:
            continue

        m = label_re.match(line)
        if m:
            label = (m.group(1) or m.group(3) or "").strip().upper()
            rest  = (m.group(2) or m.group(4) or "").strip()

            canonical = _ALIAS_MAP.get(label)
            if canonical:
                # Save previous section
                if current_key and current_lines:
                    sections[current_key] = " ".join(current_lines).strip()
                current_key   = canonical
                current_lines = [rest] if rest else []
                continue

        if current_key:
            current_lines.append(line)
        # Lines before any recognised label go to a default bucket
        else:
            sections.setdefault("background", "")
            sections["background"] += " " + line

    if current_key and current_lines:
        sections[current_key] = " ".join(current_lines).strip()

    return sections


def _split_sentences(text: str) -> list[str]:
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]


def _first_sentences(text: str, n: int) -> str:
    return " ".join(_split_sentences(text)[:n])


def _extract_conclusion(text: str) -> str:
    sentences = _split_sentences(text)
    conclusion_kws = [
        "conclude", "conclusion", "suggest", "recommend",
        "therefore", "thus", "in summary", "findings indicate",
        "results show", "results suggest",
    ]
    hits = [s for s in sentences if any(k in s.lower() for k in conclusion_kws)]
    return " ".join(hits[:2]) if hits else " ".join(sentences[-2:])


def _extract_nursing_sentences(text: str) -> str:
    sentences = _split_sentences(text)
    scored = []
    for s in sentences:
        s_lower = s.lower()
        score = sum(1 for kw in _NURSING_KEYWORDS if kw in s_lower)
        if score:
            scored.append((score, s))
    scored.sort(reverse=True)
    top = [s for _, s in scored[:2]]
    return " ".join(top) if top else "Review the full abstract for nursing practice implications."


def _trim(text: str, limit: int) -> str:
    if not text:
        return ""
    text = text.strip()
    return text[:limit].rsplit(" ", 1)[0] + "…" if len(text) > limit else text


def _empty_summary() -> dict:
    return {
        "overview":             "Abstract not available for this article.",
        "methods":              "",
        "key_findings":         "See full article.",
        "nursing_implications": "Review the full article for nursing implications.",
        "evidence_level":       ("Unclassified", "⚪", "Evidence level not determined"),
    }