nursing-ebp-tool / summarizer.py
Lincoln Gombedza
Initial commit: EBP Research Tool for Student Nurses
5095870
"""
Nursing-focused abstract summariser.
No ML model required — uses structured-abstract parsing + heuristic sentence
scoring so the app stays fast on a free CPU Hugging Face Space.
"""
import re
# Section headers commonly found in PubMed structured abstracts
_SECTION_ALIASES = {
"background": ["BACKGROUND", "INTRODUCTION", "CONTEXT"],
"objective": ["OBJECTIVE", "OBJECTIVES", "PURPOSE", "AIM", "AIMS", "GOAL"],
"methods": ["METHODS", "METHOD", "DESIGN", "PARTICIPANTS", "SETTING",
"MATERIALS AND METHODS", "STUDY DESIGN"],
"results": ["RESULTS", "FINDINGS", "OUTCOMES"],
"conclusions": ["CONCLUSIONS", "CONCLUSION", "IMPLICATIONS",
"CLINICAL IMPLICATIONS", "SIGNIFICANCE", "SUMMARY"],
}
# Build reverse map: uppercase alias → canonical key
_ALIAS_MAP: dict[str, str] = {}
for canonical, aliases in _SECTION_ALIASES.items():
for alias in aliases:
_ALIAS_MAP[alias.upper()] = canonical
# Evidence-level inference (Melnyk & Fineout-Overholt hierarchy)
_EVIDENCE_RULES = [
(["systematic review", "meta-analysis", "meta analysis"],
"Level I", "🟢", "Systematic Review / Meta-Analysis"),
(["randomized controlled", "randomised controlled", "rct"],
"Level II", "🟡", "Randomized Controlled Trial"),
(["quasi-experimental", "quasi experimental", "pre-post", "before-after"],
"Level III", "🟠", "Quasi-Experimental Study"),
(["cohort", "case-control", "case control", "longitudinal"],
"Level IV", "🟠", "Cohort / Case-Control Study"),
(["systematic review of qualitative"],
"Level V", "🔵", "Systematic Review of Qualitative Studies"),
(["qualitative", "grounded theory", "phenomenolog", "ethnograph"],
"Level VI", "🔵", "Qualitative / Descriptive Study"),
(["expert opinion", "committee report", "consensus"],
"Level VII", "⚫", "Expert Opinion / Consensus"),
]
# Keywords that suggest nursing relevance for implication extraction
_NURSING_KEYWORDS = [
"nurse", "nursing", "patient", "care", "clinical", "practice",
"intervention", "outcome", "recommend", "implication", "significant",
"effective", "improve", "reduce", "prevent", "education", "safety",
]
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def infer_evidence_level(title: str, abstract: str = "") -> tuple[str, str, str]:
"""Return (level_code, emoji, description) based on study text."""
text = (title + " " + abstract).lower()
for keywords, code, emoji, desc in _EVIDENCE_RULES:
if any(kw in text for kw in keywords):
return code, emoji, desc
return "Unclassified", "⚪", "Evidence level not determined"
def summarize(abstract: str, title: str = "") -> dict:
"""
Parse abstract into nursing-focused sections.
Returns dict with keys:
overview, methods, key_findings, nursing_implications, evidence_level
"""
if not abstract or abstract == "Abstract not available.":
return _empty_summary()
sections = _parse_structured(abstract)
overview = (
sections.get("background")
or sections.get("objective")
or _first_sentences(abstract, 2)
)
methods = sections.get("methods", "")
key_findings = (
sections.get("results")
or _extract_conclusion(abstract)
)
nursing_implications = (
sections.get("conclusions")
or _extract_nursing_sentences(abstract)
)
ev_level = infer_evidence_level(title, abstract)
return {
"overview": _trim(overview, 450),
"methods": _trim(methods, 350),
"key_findings": _trim(key_findings, 450),
"nursing_implications": _trim(nursing_implications, 400),
"evidence_level": ev_level,
}
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _parse_structured(abstract: str) -> dict[str, str]:
"""Extract sections from a PubMed-style structured abstract."""
sections: dict[str, str] = {}
current_key = None
current_lines: list[str] = []
# Match bold markdown labels (from fetch_abstract) or plain ALL-CAPS labels
label_re = re.compile(
r"^\*\*([A-Z][A-Z\s/&]+?):\*\*\s*(.*)|^([A-Z][A-Z\s/&]+?):\s*(.*)"
)
for raw_line in abstract.split("\n"):
line = raw_line.strip()
if not line:
continue
m = label_re.match(line)
if m:
label = (m.group(1) or m.group(3) or "").strip().upper()
rest = (m.group(2) or m.group(4) or "").strip()
canonical = _ALIAS_MAP.get(label)
if canonical:
# Save previous section
if current_key and current_lines:
sections[current_key] = " ".join(current_lines).strip()
current_key = canonical
current_lines = [rest] if rest else []
continue
if current_key:
current_lines.append(line)
# Lines before any recognised label go to a default bucket
else:
sections.setdefault("background", "")
sections["background"] += " " + line
if current_key and current_lines:
sections[current_key] = " ".join(current_lines).strip()
return sections
def _split_sentences(text: str) -> list[str]:
return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
def _first_sentences(text: str, n: int) -> str:
return " ".join(_split_sentences(text)[:n])
def _extract_conclusion(text: str) -> str:
sentences = _split_sentences(text)
conclusion_kws = [
"conclude", "conclusion", "suggest", "recommend",
"therefore", "thus", "in summary", "findings indicate",
"results show", "results suggest",
]
hits = [s for s in sentences if any(k in s.lower() for k in conclusion_kws)]
return " ".join(hits[:2]) if hits else " ".join(sentences[-2:])
def _extract_nursing_sentences(text: str) -> str:
sentences = _split_sentences(text)
scored = []
for s in sentences:
s_lower = s.lower()
score = sum(1 for kw in _NURSING_KEYWORDS if kw in s_lower)
if score:
scored.append((score, s))
scored.sort(reverse=True)
top = [s for _, s in scored[:2]]
return " ".join(top) if top else "Review the full abstract for nursing practice implications."
def _trim(text: str, limit: int) -> str:
if not text:
return ""
text = text.strip()
return text[:limit].rsplit(" ", 1)[0] + "…" if len(text) > limit else text
def _empty_summary() -> dict:
return {
"overview": "Abstract not available for this article.",
"methods": "",
"key_findings": "See full article.",
"nursing_implications": "Review the full article for nursing implications.",
"evidence_level": ("Unclassified", "⚪", "Evidence level not determined"),
}