fix: citation author parsing bug + replace broken NHS K&L Hub link
Browse filesCitation fix (utils/citations.py):
- Add _is_initials_token() helper — detects PubMed/Europe PMC "Surname JK"
format where the last token is run-together uppercase initials (1-4 chars)
- Add _initials_from_str() helper — expands "JK" → "J. K." for Harvard/APA
- Apply fix to all four formatters: Harvard, APA 7th, Vancouver, AMA
- Before: "Smith JK" → "JK, S." (last token wrongly treated as surname)
- After: "Smith JK" → "Smith, J. K." (Harvard) / "Smith JK" (Vancouver/AMA)
- Also handles multi-word surnames: "van der Berg JK" → "van der Berg, J. K."
Link fix (streamlit_app.py):
- Replace broken NHS K&L Hub (Scottish-only .nhs.scot URL) with TRIP Database
- TRIP is free, no-login EBP search covering RCTs, guidelines, reviews
- URL: https://www.tripdatabase.com/search?criteria={query}
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- streamlit_app.py +2 -2
- utils/citations.py +49 -0
|
@@ -465,8 +465,8 @@ with tab_search:
|
|
| 465 |
use_container_width=True,
|
| 466 |
)
|
| 467 |
link_cols[2].link_button(
|
| 468 |
-
"🟢
|
| 469 |
-
f"https://www.
|
| 470 |
use_container_width=True,
|
| 471 |
)
|
| 472 |
link_cols[3].link_button(
|
|
|
|
| 465 |
use_container_width=True,
|
| 466 |
)
|
| 467 |
link_cols[2].link_button(
|
| 468 |
+
"🟢 TRIP Database",
|
| 469 |
+
f"https://www.tripdatabase.com/search?criteria={q_enc}",
|
| 470 |
use_container_width=True,
|
| 471 |
)
|
| 472 |
link_cols[3].link_button(
|
|
@@ -74,6 +74,11 @@ def _apa_single(name: str) -> str:
|
|
| 74 |
return f"{last.strip()}, {initials}"
|
| 75 |
parts = name.split()
|
| 76 |
if len(parts) >= 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
last = parts[-1]
|
| 78 |
initials = _initials(" ".join(parts[:-1]))
|
| 79 |
return f"{last}, {initials}"
|
|
@@ -147,6 +152,11 @@ def _vancouver_single(name: str) -> str:
|
|
| 147 |
return f"{last.strip()} {initials}"
|
| 148 |
parts = name.split()
|
| 149 |
if len(parts) >= 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
last = parts[-1]
|
| 151 |
initials = "".join(p[0].upper() for p in parts[:-1] if p)
|
| 152 |
return f"{last} {initials}"
|
|
@@ -222,6 +232,12 @@ def _harvard_single(name: str) -> str:
|
|
| 222 |
return f"{last.strip()}, {initials}"
|
| 223 |
parts = name.split()
|
| 224 |
if len(parts) >= 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
last = parts[-1]
|
| 226 |
initials = _initials(" ".join(parts[:-1]))
|
| 227 |
return f"{last}, {initials}"
|
|
@@ -290,6 +306,11 @@ def _ama_single(name: str) -> str:
|
|
| 290 |
return f"{last.strip()} {initials}"
|
| 291 |
parts = name.split()
|
| 292 |
if len(parts) >= 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
last = parts[-1]
|
| 294 |
initials = "".join(p[0].upper() for p in parts[:-1] if p)
|
| 295 |
return f"{last} {initials}"
|
|
@@ -306,6 +327,34 @@ def _initials(first_middle: str) -> str:
|
|
| 306 |
return " ".join(p[0].upper() + "." for p in parts if p)
|
| 307 |
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
def _clean_title(title: str) -> str:
|
| 310 |
return title.strip().rstrip(".")
|
| 311 |
|
|
|
|
| 74 |
return f"{last.strip()}, {initials}"
|
| 75 |
parts = name.split()
|
| 76 |
if len(parts) >= 2:
|
| 77 |
+
# Detect PubMed/Europe PMC "Surname JK" format
|
| 78 |
+
if _is_initials_token(parts[-1]):
|
| 79 |
+
last = " ".join(parts[:-1]) # handles "van der Berg JK"
|
| 80 |
+
initials = _initials_from_str(parts[-1])
|
| 81 |
+
return f"{last}, {initials}"
|
| 82 |
last = parts[-1]
|
| 83 |
initials = _initials(" ".join(parts[:-1]))
|
| 84 |
return f"{last}, {initials}"
|
|
|
|
| 152 |
return f"{last.strip()} {initials}"
|
| 153 |
parts = name.split()
|
| 154 |
if len(parts) >= 2:
|
| 155 |
+
# Detect PubMed/Europe PMC "Surname JK" format
|
| 156 |
+
if _is_initials_token(parts[-1]):
|
| 157 |
+
last = " ".join(parts[:-1])
|
| 158 |
+
initials = parts[-1].upper() # already run-together: "JK" stays "JK"
|
| 159 |
+
return f"{last} {initials}"
|
| 160 |
last = parts[-1]
|
| 161 |
initials = "".join(p[0].upper() for p in parts[:-1] if p)
|
| 162 |
return f"{last} {initials}"
|
|
|
|
| 232 |
return f"{last.strip()}, {initials}"
|
| 233 |
parts = name.split()
|
| 234 |
if len(parts) >= 2:
|
| 235 |
+
# Detect PubMed/Europe PMC "Surname JK" format (e.g. "Smith JK")
|
| 236 |
+
# Without this check, "Smith JK" → last="JK", initials="S." → "JK, S." (wrong)
|
| 237 |
+
if _is_initials_token(parts[-1]):
|
| 238 |
+
last = " ".join(parts[:-1]) # handles multi-word surnames too
|
| 239 |
+
initials = _initials_from_str(parts[-1]) # "JK" → "J. K."
|
| 240 |
+
return f"{last}, {initials}"
|
| 241 |
last = parts[-1]
|
| 242 |
initials = _initials(" ".join(parts[:-1]))
|
| 243 |
return f"{last}, {initials}"
|
|
|
|
| 306 |
return f"{last.strip()} {initials}"
|
| 307 |
parts = name.split()
|
| 308 |
if len(parts) >= 2:
|
| 309 |
+
# Detect PubMed/Europe PMC "Surname JK" format
|
| 310 |
+
if _is_initials_token(parts[-1]):
|
| 311 |
+
last = " ".join(parts[:-1])
|
| 312 |
+
initials = parts[-1].upper() # AMA uses run-together: "Smith JK"
|
| 313 |
+
return f"{last} {initials}"
|
| 314 |
last = parts[-1]
|
| 315 |
initials = "".join(p[0].upper() for p in parts[:-1] if p)
|
| 316 |
return f"{last} {initials}"
|
|
|
|
| 327 |
return " ".join(p[0].upper() + "." for p in parts if p)
|
| 328 |
|
| 329 |
|
| 330 |
+
def _initials_from_str(token: str) -> str:
|
| 331 |
+
"""
|
| 332 |
+
Expand a run-together initials token from PubMed/Europe PMC into dotted
|
| 333 |
+
initials: "JK" → "J. K." | "AB" → "A. B." | "J" → "J."
|
| 334 |
+
"""
|
| 335 |
+
return " ".join(c.upper() + "." for c in token if c.isalpha())
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def _is_initials_token(token: str) -> bool:
|
| 339 |
+
"""
|
| 340 |
+
Return True when a name token looks like run-together initials rather than
|
| 341 |
+
a proper word — i.e. it is 1–4 uppercase letters with no vowels OR is
|
| 342 |
+
entirely uppercase and short.
|
| 343 |
+
|
| 344 |
+
Examples that return True : "JK", "AB", "J", "JKL", "ABCD"
|
| 345 |
+
Examples that return False: "Smith", "John", "van", "De", "O'Brien"
|
| 346 |
+
"""
|
| 347 |
+
if not token or not token.isalpha():
|
| 348 |
+
return False
|
| 349 |
+
if len(token) > 4:
|
| 350 |
+
return False
|
| 351 |
+
# Single letter is always an initial
|
| 352 |
+
if len(token) == 1:
|
| 353 |
+
return token.isupper()
|
| 354 |
+
# Multi-char: must be all-uppercase (PubMed stores "Smith JK", not "Smith jk")
|
| 355 |
+
return token.isupper()
|
| 356 |
+
|
| 357 |
+
|
| 358 |
def _clean_title(title: str) -> str:
|
| 359 |
return title.strip().rstrip(".")
|
| 360 |
|