File size: 7,231 Bytes
5095870 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | """
Nursing-focused abstract summariser.
No ML model required — uses structured-abstract parsing + heuristic sentence
scoring so the app stays fast on a free CPU Hugging Face Space.
"""
import re
# Section headers commonly found in PubMed structured abstracts
_SECTION_ALIASES = {
"background": ["BACKGROUND", "INTRODUCTION", "CONTEXT"],
"objective": ["OBJECTIVE", "OBJECTIVES", "PURPOSE", "AIM", "AIMS", "GOAL"],
"methods": ["METHODS", "METHOD", "DESIGN", "PARTICIPANTS", "SETTING",
"MATERIALS AND METHODS", "STUDY DESIGN"],
"results": ["RESULTS", "FINDINGS", "OUTCOMES"],
"conclusions": ["CONCLUSIONS", "CONCLUSION", "IMPLICATIONS",
"CLINICAL IMPLICATIONS", "SIGNIFICANCE", "SUMMARY"],
}
# Build reverse map: uppercase alias → canonical key
_ALIAS_MAP: dict[str, str] = {}
for canonical, aliases in _SECTION_ALIASES.items():
for alias in aliases:
_ALIAS_MAP[alias.upper()] = canonical
# Evidence-level inference (Melnyk & Fineout-Overholt hierarchy)
_EVIDENCE_RULES = [
(["systematic review", "meta-analysis", "meta analysis"],
"Level I", "🟢", "Systematic Review / Meta-Analysis"),
(["randomized controlled", "randomised controlled", "rct"],
"Level II", "🟡", "Randomized Controlled Trial"),
(["quasi-experimental", "quasi experimental", "pre-post", "before-after"],
"Level III", "🟠", "Quasi-Experimental Study"),
(["cohort", "case-control", "case control", "longitudinal"],
"Level IV", "🟠", "Cohort / Case-Control Study"),
(["systematic review of qualitative"],
"Level V", "🔵", "Systematic Review of Qualitative Studies"),
(["qualitative", "grounded theory", "phenomenolog", "ethnograph"],
"Level VI", "🔵", "Qualitative / Descriptive Study"),
(["expert opinion", "committee report", "consensus"],
"Level VII", "⚫", "Expert Opinion / Consensus"),
]
# Keywords that suggest nursing relevance for implication extraction
_NURSING_KEYWORDS = [
"nurse", "nursing", "patient", "care", "clinical", "practice",
"intervention", "outcome", "recommend", "implication", "significant",
"effective", "improve", "reduce", "prevent", "education", "safety",
]
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def infer_evidence_level(title: str, abstract: str = "") -> tuple[str, str, str]:
"""Return (level_code, emoji, description) based on study text."""
text = (title + " " + abstract).lower()
for keywords, code, emoji, desc in _EVIDENCE_RULES:
if any(kw in text for kw in keywords):
return code, emoji, desc
return "Unclassified", "⚪", "Evidence level not determined"
def summarize(abstract: str, title: str = "") -> dict:
"""
Parse abstract into nursing-focused sections.
Returns dict with keys:
overview, methods, key_findings, nursing_implications, evidence_level
"""
if not abstract or abstract == "Abstract not available.":
return _empty_summary()
sections = _parse_structured(abstract)
overview = (
sections.get("background")
or sections.get("objective")
or _first_sentences(abstract, 2)
)
methods = sections.get("methods", "")
key_findings = (
sections.get("results")
or _extract_conclusion(abstract)
)
nursing_implications = (
sections.get("conclusions")
or _extract_nursing_sentences(abstract)
)
ev_level = infer_evidence_level(title, abstract)
return {
"overview": _trim(overview, 450),
"methods": _trim(methods, 350),
"key_findings": _trim(key_findings, 450),
"nursing_implications": _trim(nursing_implications, 400),
"evidence_level": ev_level,
}
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _parse_structured(abstract: str) -> dict[str, str]:
"""Extract sections from a PubMed-style structured abstract."""
sections: dict[str, str] = {}
current_key = None
current_lines: list[str] = []
# Match bold markdown labels (from fetch_abstract) or plain ALL-CAPS labels
label_re = re.compile(
r"^\*\*([A-Z][A-Z\s/&]+?):\*\*\s*(.*)|^([A-Z][A-Z\s/&]+?):\s*(.*)"
)
for raw_line in abstract.split("\n"):
line = raw_line.strip()
if not line:
continue
m = label_re.match(line)
if m:
label = (m.group(1) or m.group(3) or "").strip().upper()
rest = (m.group(2) or m.group(4) or "").strip()
canonical = _ALIAS_MAP.get(label)
if canonical:
# Save previous section
if current_key and current_lines:
sections[current_key] = " ".join(current_lines).strip()
current_key = canonical
current_lines = [rest] if rest else []
continue
if current_key:
current_lines.append(line)
# Lines before any recognised label go to a default bucket
else:
sections.setdefault("background", "")
sections["background"] += " " + line
if current_key and current_lines:
sections[current_key] = " ".join(current_lines).strip()
return sections
def _split_sentences(text: str) -> list[str]:
return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
def _first_sentences(text: str, n: int) -> str:
return " ".join(_split_sentences(text)[:n])
def _extract_conclusion(text: str) -> str:
sentences = _split_sentences(text)
conclusion_kws = [
"conclude", "conclusion", "suggest", "recommend",
"therefore", "thus", "in summary", "findings indicate",
"results show", "results suggest",
]
hits = [s for s in sentences if any(k in s.lower() for k in conclusion_kws)]
return " ".join(hits[:2]) if hits else " ".join(sentences[-2:])
def _extract_nursing_sentences(text: str) -> str:
sentences = _split_sentences(text)
scored = []
for s in sentences:
s_lower = s.lower()
score = sum(1 for kw in _NURSING_KEYWORDS if kw in s_lower)
if score:
scored.append((score, s))
scored.sort(reverse=True)
top = [s for _, s in scored[:2]]
return " ".join(top) if top else "Review the full abstract for nursing practice implications."
def _trim(text: str, limit: int) -> str:
if not text:
return ""
text = text.strip()
return text[:limit].rsplit(" ", 1)[0] + "…" if len(text) > limit else text
def _empty_summary() -> dict:
return {
"overview": "Abstract not available for this article.",
"methods": "",
"key_findings": "See full article.",
"nursing_implications": "Review the full article for nursing implications.",
"evidence_level": ("Unclassified", "⚪", "Evidence level not determined"),
}
|