3v324v23 commited on
Commit
74387ee
·
1 Parent(s): 45dc8b6

Fix: false 100% confidence bug and dummy visit fallback

Browse files
Files changed (2) hide show
  1. api/extract_report.py +14 -9
  2. dashboard/app.js +9 -6
api/extract_report.py CHANGED
@@ -389,15 +389,17 @@ def parse_table_format(raw_text: str) -> List[dict]:
389
 
390
 
391
  def parse_column_format(raw_text: str) -> List[dict]:
392
- """Parser B Column-per-visit layout with header row."""
393
  lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
394
  header_idx = -1
395
  header_fields = []
396
 
397
  for i, line in enumerate(lines):
398
- parts = re.split(r'\s{2,}|\t', line)
 
399
  known = [match_label(p) for p in parts]
400
- if sum(f is not None for f in known) >= 3:
 
401
  header_idx = i
402
  header_fields = known
403
  break
@@ -406,10 +408,14 @@ def parse_column_format(raw_text: str) -> List[dict]:
406
  return []
407
 
408
  visits = []
409
- for line in lines[header_idx + 1:]:
 
410
  if is_skip_line(line):
411
  continue
412
- parts = re.split(r'\s{2,}|\t', line)
 
 
 
413
  visit = {}
414
  for col_idx, field in enumerate(header_fields):
415
  if field is None or col_idx >= len(parts):
@@ -551,15 +557,14 @@ async def extract_report(request: ExtractRequest):
551
 
552
  # 4. Handle complete failure
553
  if not best_visits or best_score == 0:
554
- fallback_text = all_texts[0][:300] if all_texts else "No text extracted"
555
  return ExtractResponse(
556
- visits = [ExtractedVisit()],
557
  patient_id = None,
558
  notes = (
559
  f"Image type detected: {img_type}. "
560
  "No structured data could be extracted. "
561
- "Likely causes: very blurry, pure handwriting, or unusual layout. "
562
- f"Raw OCR text: {fallback_text}"
563
  ),
564
  confidence = 0.0,
565
  raw_text = fallback_text,
 
389
 
390
 
391
  def parse_column_format(raw_text: str) -> List[dict]:
392
+ """Parser B -- Column-per-visit layout with header row."""
393
  lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
394
  header_idx = -1
395
  header_fields = []
396
 
397
  for i, line in enumerate(lines):
398
+ # Relaxed split: look for 1+ spaces or tabs
399
+ parts = re.split(r'\s{1,}|\t', line)
400
  known = [match_label(p) for p in parts]
401
+ # Must find at least 2 known labels to be a header
402
+ if sum(f is not None for f in known) >= 2:
403
  header_idx = i
404
  header_fields = known
405
  break
 
408
  return []
409
 
410
  visits = []
411
+ # Maximum lookahead for values
412
+ for line in lines[header_idx + 1:header_idx + 10]:
413
  if is_skip_line(line):
414
  continue
415
+ # Use simple split for data rows too
416
+ parts = re.split(r'\s{1,}|\t', line)
417
+ if not parts: continue
418
+
419
  visit = {}
420
  for col_idx, field in enumerate(header_fields):
421
  if field is None or col_idx >= len(parts):
 
557
 
558
  # 4. Handle complete failure
559
  if not best_visits or best_score == 0:
560
+ fallback_text = all_texts[0][:200] if all_texts else "No text extracted"
561
  return ExtractResponse(
562
+ visits = [],
563
  patient_id = None,
564
  notes = (
565
  f"Image type detected: {img_type}. "
566
  "No structured data could be extracted. "
567
+ "Check image quality or unusual layout."
 
568
  ),
569
  confidence = 0.0,
570
  raw_text = fallback_text,
dashboard/app.js CHANGED
@@ -196,16 +196,19 @@ async function extractFromImage() {
196
 
197
  visits.forEach(visit => addVisit(visit));
198
 
199
- const confPct = Math.round((data.confidence || 1.0) * 100);
200
  const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
201
  const confEl = document.getElementById("confidence-strip");
202
  confEl.style.display = "block";
203
  confEl.innerHTML = `
204
- <span style="color:${confColor};font-weight:600;">
205
- Extraction confidence: ${confPct}%
206
- </span>
207
- ${confPct < 80 ? " -- verify highlighted fields" : " -- extraction successful"}
208
- ${data.notes ? `<br><small>${data.notes}</small>` : ""}
 
 
 
209
  `;
210
 
211
  statusEl.className = "extract-status success";
 
196
 
197
  visits.forEach(visit => addVisit(visit));
198
 
199
+ const confPct = Math.round((data.confidence !== undefined ? data.confidence : 1.0) * 100);
200
  const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
201
  const confEl = document.getElementById("confidence-strip");
202
  confEl.style.display = "block";
203
  confEl.innerHTML = `
204
+ <div style="margin-bottom:8px;">
205
+ <span style="color:${confColor};font-weight:600;">
206
+ Extraction confidence: ${confPct}%
207
+ </span>
208
+ ${confPct < 80 ? " -- verify fields" : " -- extraction successful"}
209
+ </div>
210
+ ${data.notes ? `<div style="font-size:0.85rem;color:var(--text-muted);border-top:1px solid #eee;padding-top:8px;">${data.notes}</div>` : ""}
211
+ ${data.raw_text ? `<div style="font-size:0.75rem;color:#999;margin-top:4px;font-family:monospace;background:#f9f9f9;padding:4px;border-radius:4px;">OCR: ${data.raw_text}...</div>` : ""}
212
  `;
213
 
214
  statusEl.className = "extract-status success";