""" Nursing-focused abstract summariser. No ML model required — uses structured-abstract parsing + heuristic sentence scoring so the app stays fast on a free CPU Hugging Face Space. """ import re # Section headers commonly found in PubMed structured abstracts _SECTION_ALIASES = { "background": ["BACKGROUND", "INTRODUCTION", "CONTEXT"], "objective": ["OBJECTIVE", "OBJECTIVES", "PURPOSE", "AIM", "AIMS", "GOAL"], "methods": ["METHODS", "METHOD", "DESIGN", "PARTICIPANTS", "SETTING", "MATERIALS AND METHODS", "STUDY DESIGN"], "results": ["RESULTS", "FINDINGS", "OUTCOMES"], "conclusions": ["CONCLUSIONS", "CONCLUSION", "IMPLICATIONS", "CLINICAL IMPLICATIONS", "SIGNIFICANCE", "SUMMARY"], } # Build reverse map: uppercase alias → canonical key _ALIAS_MAP: dict[str, str] = {} for canonical, aliases in _SECTION_ALIASES.items(): for alias in aliases: _ALIAS_MAP[alias.upper()] = canonical # Evidence-level inference (Melnyk & Fineout-Overholt hierarchy) _EVIDENCE_RULES = [ (["systematic review", "meta-analysis", "meta analysis"], "Level I", "🟢", "Systematic Review / Meta-Analysis"), (["randomized controlled", "randomised controlled", "rct"], "Level II", "🟡", "Randomized Controlled Trial"), (["quasi-experimental", "quasi experimental", "pre-post", "before-after"], "Level III", "🟠", "Quasi-Experimental Study"), (["cohort", "case-control", "case control", "longitudinal"], "Level IV", "🟠", "Cohort / Case-Control Study"), (["systematic review of qualitative"], "Level V", "🔵", "Systematic Review of Qualitative Studies"), (["qualitative", "grounded theory", "phenomenolog", "ethnograph"], "Level VI", "🔵", "Qualitative / Descriptive Study"), (["expert opinion", "committee report", "consensus"], "Level VII", "⚫", "Expert Opinion / Consensus"), ] # Keywords that suggest nursing relevance for implication extraction _NURSING_KEYWORDS = [ "nurse", "nursing", "patient", "care", "clinical", "practice", "intervention", "outcome", "recommend", "implication", "significant", "effective", "improve", "reduce", "prevent", "education", "safety", ] # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def infer_evidence_level(title: str, abstract: str = "") -> tuple[str, str, str]: """Return (level_code, emoji, description) based on study text.""" text = (title + " " + abstract).lower() for keywords, code, emoji, desc in _EVIDENCE_RULES: if any(kw in text for kw in keywords): return code, emoji, desc return "Unclassified", "⚪", "Evidence level not determined" def summarize(abstract: str, title: str = "") -> dict: """ Parse abstract into nursing-focused sections. Returns dict with keys: overview, methods, key_findings, nursing_implications, evidence_level """ if not abstract or abstract == "Abstract not available.": return _empty_summary() sections = _parse_structured(abstract) overview = ( sections.get("background") or sections.get("objective") or _first_sentences(abstract, 2) ) methods = sections.get("methods", "") key_findings = ( sections.get("results") or _extract_conclusion(abstract) ) nursing_implications = ( sections.get("conclusions") or _extract_nursing_sentences(abstract) ) ev_level = infer_evidence_level(title, abstract) return { "overview": _trim(overview, 450), "methods": _trim(methods, 350), "key_findings": _trim(key_findings, 450), "nursing_implications": _trim(nursing_implications, 400), "evidence_level": ev_level, } # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _parse_structured(abstract: str) -> dict[str, str]: """Extract sections from a PubMed-style structured abstract.""" sections: dict[str, str] = {} current_key = None current_lines: list[str] = [] # Match bold markdown labels (from fetch_abstract) or plain ALL-CAPS labels label_re = re.compile( r"^\*\*([A-Z][A-Z\s/&]+?):\*\*\s*(.*)|^([A-Z][A-Z\s/&]+?):\s*(.*)" ) for raw_line in abstract.split("\n"): line = raw_line.strip() if not line: continue m = label_re.match(line) if m: label = (m.group(1) or m.group(3) or "").strip().upper() rest = (m.group(2) or m.group(4) or "").strip() canonical = _ALIAS_MAP.get(label) if canonical: # Save previous section if current_key and current_lines: sections[current_key] = " ".join(current_lines).strip() current_key = canonical current_lines = [rest] if rest else [] continue if current_key: current_lines.append(line) # Lines before any recognised label go to a default bucket else: sections.setdefault("background", "") sections["background"] += " " + line if current_key and current_lines: sections[current_key] = " ".join(current_lines).strip() return sections def _split_sentences(text: str) -> list[str]: return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()] def _first_sentences(text: str, n: int) -> str: return " ".join(_split_sentences(text)[:n]) def _extract_conclusion(text: str) -> str: sentences = _split_sentences(text) conclusion_kws = [ "conclude", "conclusion", "suggest", "recommend", "therefore", "thus", "in summary", "findings indicate", "results show", "results suggest", ] hits = [s for s in sentences if any(k in s.lower() for k in conclusion_kws)] return " ".join(hits[:2]) if hits else " ".join(sentences[-2:]) def _extract_nursing_sentences(text: str) -> str: sentences = _split_sentences(text) scored = [] for s in sentences: s_lower = s.lower() score = sum(1 for kw in _NURSING_KEYWORDS if kw in s_lower) if score: scored.append((score, s)) scored.sort(reverse=True) top = [s for _, s in scored[:2]] return " ".join(top) if top else "Review the full abstract for nursing practice implications." def _trim(text: str, limit: int) -> str: if not text: return "" text = text.strip() return text[:limit].rsplit(" ", 1)[0] + "…" if len(text) > limit else text def _empty_summary() -> dict: return { "overview": "Abstract not available for this article.", "methods": "", "key_findings": "See full article.", "nursing_implications": "Review the full article for nursing implications.", "evidence_level": ("Unclassified", "⚪", "Evidence level not determined"), }