Lincoln Gombedza Claude Sonnet 4.6 commited on
Commit
4e6aab3
·
1 Parent(s): 88a2b0d

fix: citation author parsing bug + replace broken NHS K&L Hub link

Browse files

Citation fix (utils/citations.py):
- Add _is_initials_token() helper — detects PubMed/Europe PMC "Surname JK"
format where the last token is run-together uppercase initials (1-4 chars)
- Add _initials_from_str() helper — expands "JK" → "J. K." for Harvard/APA
- Apply fix to all four formatters: Harvard, APA 7th, Vancouver, AMA
- Before: "Smith JK" → "JK, S." (last token wrongly treated as surname)
- After: "Smith JK" → "Smith, J. K." (Harvard) / "Smith JK" (Vancouver/AMA)
- Also handles multi-word surnames: "van der Berg JK" → "van der Berg, J. K."

Link fix (streamlit_app.py):
- Replace broken NHS K&L Hub (Scottish-only .nhs.scot URL) with TRIP Database
- TRIP is free, no-login EBP search covering RCTs, guidelines, reviews
- URL: https://www.tripdatabase.com/search?criteria={query}

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. streamlit_app.py +2 -2
  2. utils/citations.py +49 -0
streamlit_app.py CHANGED
@@ -465,8 +465,8 @@ with tab_search:
465
  use_container_width=True,
466
  )
467
  link_cols[2].link_button(
468
- "🟢 NHS K&L Hub",
469
- f"https://www.knowledgeandevidence.nhs.scot/search?query={q_enc}",
470
  use_container_width=True,
471
  )
472
  link_cols[3].link_button(
 
465
  use_container_width=True,
466
  )
467
  link_cols[2].link_button(
468
+ "🟢 TRIP Database",
469
+ f"https://www.tripdatabase.com/search?criteria={q_enc}",
470
  use_container_width=True,
471
  )
472
  link_cols[3].link_button(
utils/citations.py CHANGED
@@ -74,6 +74,11 @@ def _apa_single(name: str) -> str:
74
  return f"{last.strip()}, {initials}"
75
  parts = name.split()
76
  if len(parts) >= 2:
 
 
 
 
 
77
  last = parts[-1]
78
  initials = _initials(" ".join(parts[:-1]))
79
  return f"{last}, {initials}"
@@ -147,6 +152,11 @@ def _vancouver_single(name: str) -> str:
147
  return f"{last.strip()} {initials}"
148
  parts = name.split()
149
  if len(parts) >= 2:
 
 
 
 
 
150
  last = parts[-1]
151
  initials = "".join(p[0].upper() for p in parts[:-1] if p)
152
  return f"{last} {initials}"
@@ -222,6 +232,12 @@ def _harvard_single(name: str) -> str:
222
  return f"{last.strip()}, {initials}"
223
  parts = name.split()
224
  if len(parts) >= 2:
 
 
 
 
 
 
225
  last = parts[-1]
226
  initials = _initials(" ".join(parts[:-1]))
227
  return f"{last}, {initials}"
@@ -290,6 +306,11 @@ def _ama_single(name: str) -> str:
290
  return f"{last.strip()} {initials}"
291
  parts = name.split()
292
  if len(parts) >= 2:
 
 
 
 
 
293
  last = parts[-1]
294
  initials = "".join(p[0].upper() for p in parts[:-1] if p)
295
  return f"{last} {initials}"
@@ -306,6 +327,34 @@ def _initials(first_middle: str) -> str:
306
  return " ".join(p[0].upper() + "." for p in parts if p)
307
 
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  def _clean_title(title: str) -> str:
310
  return title.strip().rstrip(".")
311
 
 
74
  return f"{last.strip()}, {initials}"
75
  parts = name.split()
76
  if len(parts) >= 2:
77
+ # Detect PubMed/Europe PMC "Surname JK" format
78
+ if _is_initials_token(parts[-1]):
79
+ last = " ".join(parts[:-1]) # handles "van der Berg JK"
80
+ initials = _initials_from_str(parts[-1])
81
+ return f"{last}, {initials}"
82
  last = parts[-1]
83
  initials = _initials(" ".join(parts[:-1]))
84
  return f"{last}, {initials}"
 
152
  return f"{last.strip()} {initials}"
153
  parts = name.split()
154
  if len(parts) >= 2:
155
+ # Detect PubMed/Europe PMC "Surname JK" format
156
+ if _is_initials_token(parts[-1]):
157
+ last = " ".join(parts[:-1])
158
+ initials = parts[-1].upper() # already run-together: "JK" stays "JK"
159
+ return f"{last} {initials}"
160
  last = parts[-1]
161
  initials = "".join(p[0].upper() for p in parts[:-1] if p)
162
  return f"{last} {initials}"
 
232
  return f"{last.strip()}, {initials}"
233
  parts = name.split()
234
  if len(parts) >= 2:
235
+ # Detect PubMed/Europe PMC "Surname JK" format (e.g. "Smith JK")
236
+ # Without this check, "Smith JK" → last="JK", initials="S." → "JK, S." (wrong)
237
+ if _is_initials_token(parts[-1]):
238
+ last = " ".join(parts[:-1]) # handles multi-word surnames too
239
+ initials = _initials_from_str(parts[-1]) # "JK" → "J. K."
240
+ return f"{last}, {initials}"
241
  last = parts[-1]
242
  initials = _initials(" ".join(parts[:-1]))
243
  return f"{last}, {initials}"
 
306
  return f"{last.strip()} {initials}"
307
  parts = name.split()
308
  if len(parts) >= 2:
309
+ # Detect PubMed/Europe PMC "Surname JK" format
310
+ if _is_initials_token(parts[-1]):
311
+ last = " ".join(parts[:-1])
312
+ initials = parts[-1].upper() # AMA uses run-together: "Smith JK"
313
+ return f"{last} {initials}"
314
  last = parts[-1]
315
  initials = "".join(p[0].upper() for p in parts[:-1] if p)
316
  return f"{last} {initials}"
 
327
  return " ".join(p[0].upper() + "." for p in parts if p)
328
 
329
 
330
+ def _initials_from_str(token: str) -> str:
331
+ """
332
+ Expand a run-together initials token from PubMed/Europe PMC into dotted
333
+ initials: "JK" → "J. K." | "AB" → "A. B." | "J" → "J."
334
+ """
335
+ return " ".join(c.upper() + "." for c in token if c.isalpha())
336
+
337
+
338
+ def _is_initials_token(token: str) -> bool:
339
+ """
340
+ Return True when a name token looks like run-together initials rather than
341
+ a proper word — i.e. it is 1–4 uppercase letters with no vowels OR is
342
+ entirely uppercase and short.
343
+
344
+ Examples that return True : "JK", "AB", "J", "JKL", "ABCD"
345
+ Examples that return False: "Smith", "John", "van", "De", "O'Brien"
346
+ """
347
+ if not token or not token.isalpha():
348
+ return False
349
+ if len(token) > 4:
350
+ return False
351
+ # Single letter is always an initial
352
+ if len(token) == 1:
353
+ return token.isupper()
354
+ # Multi-char: must be all-uppercase (PubMed stores "Smith JK", not "Smith jk")
355
+ return token.isupper()
356
+
357
+
358
  def _clean_title(title: str) -> str:
359
  return title.strip().rstrip(".")
360