Spaces:
Sleeping
Sleeping
Fix: false 100% confidence bug and dummy visit fallback
Browse files- api/extract_report.py +14 -9
- dashboard/app.js +9 -6
api/extract_report.py
CHANGED
|
@@ -389,15 +389,17 @@ def parse_table_format(raw_text: str) -> List[dict]:
|
|
| 389 |
|
| 390 |
|
| 391 |
def parse_column_format(raw_text: str) -> List[dict]:
|
| 392 |
-
"""Parser B
|
| 393 |
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
|
| 394 |
header_idx = -1
|
| 395 |
header_fields = []
|
| 396 |
|
| 397 |
for i, line in enumerate(lines):
|
| 398 |
-
|
|
|
|
| 399 |
known = [match_label(p) for p in parts]
|
| 400 |
-
|
|
|
|
| 401 |
header_idx = i
|
| 402 |
header_fields = known
|
| 403 |
break
|
|
@@ -406,10 +408,14 @@ def parse_column_format(raw_text: str) -> List[dict]:
|
|
| 406 |
return []
|
| 407 |
|
| 408 |
visits = []
|
| 409 |
-
|
|
|
|
| 410 |
if is_skip_line(line):
|
| 411 |
continue
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
| 413 |
visit = {}
|
| 414 |
for col_idx, field in enumerate(header_fields):
|
| 415 |
if field is None or col_idx >= len(parts):
|
|
@@ -551,15 +557,14 @@ async def extract_report(request: ExtractRequest):
|
|
| 551 |
|
| 552 |
# 4. Handle complete failure
|
| 553 |
if not best_visits or best_score == 0:
|
| 554 |
-
fallback_text = all_texts[0][:
|
| 555 |
return ExtractResponse(
|
| 556 |
-
visits = [
|
| 557 |
patient_id = None,
|
| 558 |
notes = (
|
| 559 |
f"Image type detected: {img_type}. "
|
| 560 |
"No structured data could be extracted. "
|
| 561 |
-
"
|
| 562 |
-
f"Raw OCR text: {fallback_text}"
|
| 563 |
),
|
| 564 |
confidence = 0.0,
|
| 565 |
raw_text = fallback_text,
|
|
|
|
| 389 |
|
| 390 |
|
| 391 |
def parse_column_format(raw_text: str) -> List[dict]:
|
| 392 |
+
"""Parser B -- Column-per-visit layout with header row."""
|
| 393 |
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
|
| 394 |
header_idx = -1
|
| 395 |
header_fields = []
|
| 396 |
|
| 397 |
for i, line in enumerate(lines):
|
| 398 |
+
# Relaxed split: look for 1+ spaces or tabs
|
| 399 |
+
parts = re.split(r'\s{1,}|\t', line)
|
| 400 |
known = [match_label(p) for p in parts]
|
| 401 |
+
# Must find at least 2 known labels to be a header
|
| 402 |
+
if sum(f is not None for f in known) >= 2:
|
| 403 |
header_idx = i
|
| 404 |
header_fields = known
|
| 405 |
break
|
|
|
|
| 408 |
return []
|
| 409 |
|
| 410 |
visits = []
|
| 411 |
+
# Maximum lookahead for values
|
| 412 |
+
for line in lines[header_idx + 1:header_idx + 10]:
|
| 413 |
if is_skip_line(line):
|
| 414 |
continue
|
| 415 |
+
# Use simple split for data rows too
|
| 416 |
+
parts = re.split(r'\s{1,}|\t', line)
|
| 417 |
+
if not parts: continue
|
| 418 |
+
|
| 419 |
visit = {}
|
| 420 |
for col_idx, field in enumerate(header_fields):
|
| 421 |
if field is None or col_idx >= len(parts):
|
|
|
|
| 557 |
|
| 558 |
# 4. Handle complete failure
|
| 559 |
if not best_visits or best_score == 0:
|
| 560 |
+
fallback_text = all_texts[0][:200] if all_texts else "No text extracted"
|
| 561 |
return ExtractResponse(
|
| 562 |
+
visits = [],
|
| 563 |
patient_id = None,
|
| 564 |
notes = (
|
| 565 |
f"Image type detected: {img_type}. "
|
| 566 |
"No structured data could be extracted. "
|
| 567 |
+
"Check image quality or unusual layout."
|
|
|
|
| 568 |
),
|
| 569 |
confidence = 0.0,
|
| 570 |
raw_text = fallback_text,
|
dashboard/app.js
CHANGED
|
@@ -196,16 +196,19 @@ async function extractFromImage() {
|
|
| 196 |
|
| 197 |
visits.forEach(visit => addVisit(visit));
|
| 198 |
|
| 199 |
-
const confPct = Math.round((data.confidence
|
| 200 |
const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
|
| 201 |
const confEl = document.getElementById("confidence-strip");
|
| 202 |
confEl.style.display = "block";
|
| 203 |
confEl.innerHTML = `
|
| 204 |
-
<
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
| 209 |
`;
|
| 210 |
|
| 211 |
statusEl.className = "extract-status success";
|
|
|
|
| 196 |
|
| 197 |
visits.forEach(visit => addVisit(visit));
|
| 198 |
|
| 199 |
+
const confPct = Math.round((data.confidence !== undefined ? data.confidence : 1.0) * 100);
|
| 200 |
const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
|
| 201 |
const confEl = document.getElementById("confidence-strip");
|
| 202 |
confEl.style.display = "block";
|
| 203 |
confEl.innerHTML = `
|
| 204 |
+
<div style="margin-bottom:8px;">
|
| 205 |
+
<span style="color:${confColor};font-weight:600;">
|
| 206 |
+
Extraction confidence: ${confPct}%
|
| 207 |
+
</span>
|
| 208 |
+
${confPct < 80 ? " -- verify fields" : " -- extraction successful"}
|
| 209 |
+
</div>
|
| 210 |
+
${data.notes ? `<div style="font-size:0.85rem;color:var(--text-muted);border-top:1px solid #eee;padding-top:8px;">${data.notes}</div>` : ""}
|
| 211 |
+
${data.raw_text ? `<div style="font-size:0.75rem;color:#999;margin-top:4px;font-family:monospace;background:#f9f9f9;padding:4px;border-radius:4px;">OCR: ${data.raw_text}...</div>` : ""}
|
| 212 |
`;
|
| 213 |
|
| 214 |
statusEl.className = "extract-status success";
|