File size: 7,231 Bytes
5095870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""
Nursing-focused abstract summariser.

No ML model required — uses structured-abstract parsing + heuristic sentence
scoring so the app stays fast on a free CPU Hugging Face Space.
"""

import re

# Section headers commonly found in PubMed structured abstracts
_SECTION_ALIASES = {
    "background":     ["BACKGROUND", "INTRODUCTION", "CONTEXT"],
    "objective":      ["OBJECTIVE", "OBJECTIVES", "PURPOSE", "AIM", "AIMS", "GOAL"],
    "methods":        ["METHODS", "METHOD", "DESIGN", "PARTICIPANTS", "SETTING",
                       "MATERIALS AND METHODS", "STUDY DESIGN"],
    "results":        ["RESULTS", "FINDINGS", "OUTCOMES"],
    "conclusions":    ["CONCLUSIONS", "CONCLUSION", "IMPLICATIONS",
                       "CLINICAL IMPLICATIONS", "SIGNIFICANCE", "SUMMARY"],
}

# Build reverse map: uppercase alias → canonical key
_ALIAS_MAP: dict[str, str] = {}
for canonical, aliases in _SECTION_ALIASES.items():
    for alias in aliases:
        _ALIAS_MAP[alias.upper()] = canonical

# Evidence-level inference (Melnyk & Fineout-Overholt hierarchy)
_EVIDENCE_RULES = [
    (["systematic review", "meta-analysis", "meta analysis"],
     "Level I",   "🟢", "Systematic Review / Meta-Analysis"),
    (["randomized controlled", "randomised controlled", "rct"],
     "Level II",  "🟡", "Randomized Controlled Trial"),
    (["quasi-experimental", "quasi experimental", "pre-post", "before-after"],
     "Level III", "🟠", "Quasi-Experimental Study"),
    (["cohort", "case-control", "case control", "longitudinal"],
     "Level IV",  "🟠", "Cohort / Case-Control Study"),
    (["systematic review of qualitative"],
     "Level V",   "🔵", "Systematic Review of Qualitative Studies"),
    (["qualitative", "grounded theory", "phenomenolog", "ethnograph"],
     "Level VI",  "🔵", "Qualitative / Descriptive Study"),
    (["expert opinion", "committee report", "consensus"],
     "Level VII", "⚫", "Expert Opinion / Consensus"),
]

# Keywords that suggest nursing relevance for implication extraction
_NURSING_KEYWORDS = [
    "nurse", "nursing", "patient", "care", "clinical", "practice",
    "intervention", "outcome", "recommend", "implication", "significant",
    "effective", "improve", "reduce", "prevent", "education", "safety",
]


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def infer_evidence_level(title: str, abstract: str = "") -> tuple[str, str, str]:
    """Return (level_code, emoji, description) based on study text."""
    text = (title + " " + abstract).lower()
    for keywords, code, emoji, desc in _EVIDENCE_RULES:
        if any(kw in text for kw in keywords):
            return code, emoji, desc
    return "Unclassified", "⚪", "Evidence level not determined"


def summarize(abstract: str, title: str = "") -> dict:
    """
    Parse abstract into nursing-focused sections.

    Returns dict with keys:
        overview, methods, key_findings, nursing_implications, evidence_level
    """
    if not abstract or abstract == "Abstract not available.":
        return _empty_summary()

    sections = _parse_structured(abstract)

    overview = (
        sections.get("background")
        or sections.get("objective")
        or _first_sentences(abstract, 2)
    )

    methods = sections.get("methods", "")

    key_findings = (
        sections.get("results")
        or _extract_conclusion(abstract)
    )

    nursing_implications = (
        sections.get("conclusions")
        or _extract_nursing_sentences(abstract)
    )

    ev_level = infer_evidence_level(title, abstract)

    return {
        "overview":             _trim(overview, 450),
        "methods":              _trim(methods, 350),
        "key_findings":         _trim(key_findings, 450),
        "nursing_implications": _trim(nursing_implications, 400),
        "evidence_level":       ev_level,
    }


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _parse_structured(abstract: str) -> dict[str, str]:
    """Extract sections from a PubMed-style structured abstract."""
    sections: dict[str, str] = {}
    current_key = None
    current_lines: list[str] = []

    # Match bold markdown labels (from fetch_abstract) or plain ALL-CAPS labels
    label_re = re.compile(
        r"^\*\*([A-Z][A-Z\s/&]+?):\*\*\s*(.*)|^([A-Z][A-Z\s/&]+?):\s*(.*)"
    )

    for raw_line in abstract.split("\n"):
        line = raw_line.strip()
        if not line:
            continue

        m = label_re.match(line)
        if m:
            label = (m.group(1) or m.group(3) or "").strip().upper()
            rest  = (m.group(2) or m.group(4) or "").strip()

            canonical = _ALIAS_MAP.get(label)
            if canonical:
                # Save previous section
                if current_key and current_lines:
                    sections[current_key] = " ".join(current_lines).strip()
                current_key   = canonical
                current_lines = [rest] if rest else []
                continue

        if current_key:
            current_lines.append(line)
        # Lines before any recognised label go to a default bucket
        else:
            sections.setdefault("background", "")
            sections["background"] += " " + line

    if current_key and current_lines:
        sections[current_key] = " ".join(current_lines).strip()

    return sections


def _split_sentences(text: str) -> list[str]:
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]


def _first_sentences(text: str, n: int) -> str:
    return " ".join(_split_sentences(text)[:n])


def _extract_conclusion(text: str) -> str:
    sentences = _split_sentences(text)
    conclusion_kws = [
        "conclude", "conclusion", "suggest", "recommend",
        "therefore", "thus", "in summary", "findings indicate",
        "results show", "results suggest",
    ]
    hits = [s for s in sentences if any(k in s.lower() for k in conclusion_kws)]
    return " ".join(hits[:2]) if hits else " ".join(sentences[-2:])


def _extract_nursing_sentences(text: str) -> str:
    sentences = _split_sentences(text)
    scored = []
    for s in sentences:
        s_lower = s.lower()
        score = sum(1 for kw in _NURSING_KEYWORDS if kw in s_lower)
        if score:
            scored.append((score, s))
    scored.sort(reverse=True)
    top = [s for _, s in scored[:2]]
    return " ".join(top) if top else "Review the full abstract for nursing practice implications."


def _trim(text: str, limit: int) -> str:
    if not text:
        return ""
    text = text.strip()
    return text[:limit].rsplit(" ", 1)[0] + "…" if len(text) > limit else text


def _empty_summary() -> dict:
    return {
        "overview":             "Abstract not available for this article.",
        "methods":              "",
        "key_findings":         "See full article.",
        "nursing_implications": "Review the full article for nursing implications.",
        "evidence_level":       ("Unclassified", "⚪", "Evidence level not determined"),
    }