Spaces:

ayushKishor
/

SheGuard

Sleeping

App Files Files Community

3v324v23 commited on Mar 23

Commit

74387ee

1 Parent(s): 45dc8b6

Fix: false 100% confidence bug and dummy visit fallback

Browse files

Files changed (2) hide show

api/extract_report.py +14 -9
dashboard/app.js +9 -6

api/extract_report.py CHANGED Viewed

@@ -389,15 +389,17 @@ def parse_table_format(raw_text: str) -> List[dict]:
 def parse_column_format(raw_text: str) -> List[dict]:
-    """Parser B — Column-per-visit layout with header row."""
     lines         = [l.strip() for l in raw_text.split('\n') if l.strip()]
     header_idx    = -1
     header_fields = []
     for i, line in enumerate(lines):
-        parts = re.split(r'\s{2,}|\t', line)
         known = [match_label(p) for p in parts]
-        if sum(f is not None for f in known) >= 3:
             header_idx    = i
             header_fields = known
             break
@@ -406,10 +408,14 @@ def parse_column_format(raw_text: str) -> List[dict]:
         return []
     visits = []
-    for line in lines[header_idx + 1:]:
         if is_skip_line(line):
             continue
-        parts = re.split(r'\s{2,}|\t', line)
         visit = {}
         for col_idx, field in enumerate(header_fields):
             if field is None or col_idx >= len(parts):
@@ -551,15 +557,14 @@ async def extract_report(request: ExtractRequest):
     # 4. Handle complete failure
     if not best_visits or best_score == 0:
-        fallback_text = all_texts[0][:300] if all_texts else "No text extracted"
         return ExtractResponse(
-            visits     = [ExtractedVisit()],
             patient_id = None,
             notes      = (
                 f"Image type detected: {img_type}. "
                 "No structured data could be extracted. "
-                "Likely causes: very blurry, pure handwriting, or unusual layout. "
-                f"Raw OCR text: {fallback_text}"
             ),
             confidence = 0.0,
             raw_text   = fallback_text,

 def parse_column_format(raw_text: str) -> List[dict]:
+    """Parser B -- Column-per-visit layout with header row."""
     lines         = [l.strip() for l in raw_text.split('\n') if l.strip()]
     header_idx    = -1
     header_fields = []
     for i, line in enumerate(lines):
+        # Relaxed split: look for 1+ spaces or tabs
+        parts = re.split(r'\s{1,}|\t', line)
         known = [match_label(p) for p in parts]
+        # Must find at least 2 known labels to be a header
+        if sum(f is not None for f in known) >= 2:
             header_idx    = i
             header_fields = known
             break
         return []
     visits = []
+    # Maximum lookahead for values
+    for line in lines[header_idx + 1:header_idx + 10]:
         if is_skip_line(line):
             continue
+        # Use simple split for data rows too
+        parts = re.split(r'\s{1,}|\t', line)
+        if not parts: continue
         visit = {}
         for col_idx, field in enumerate(header_fields):
             if field is None or col_idx >= len(parts):
     # 4. Handle complete failure
     if not best_visits or best_score == 0:
+        fallback_text = all_texts[0][:200] if all_texts else "No text extracted"
         return ExtractResponse(
+            visits     = [],
             patient_id = None,
             notes      = (
                 f"Image type detected: {img_type}. "
                 "No structured data could be extracted. "
+                "Check image quality or unusual layout."
             ),
             confidence = 0.0,
             raw_text   = fallback_text,

dashboard/app.js CHANGED Viewed

@@ -196,16 +196,19 @@ async function extractFromImage() {
     visits.forEach(visit => addVisit(visit));
-    const confPct = Math.round((data.confidence || 1.0) * 100);
     const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
     const confEl = document.getElementById("confidence-strip");
     confEl.style.display = "block";
     confEl.innerHTML = `
-      <span style="color:${confColor};font-weight:600;">
-        Extraction confidence: ${confPct}%
-      </span>
-      ${confPct < 80 ? " -- verify highlighted fields" : " -- extraction successful"}
-      ${data.notes ? `<br><small>${data.notes}</small>` : ""}
     `;
     statusEl.className = "extract-status success";

     visits.forEach(visit => addVisit(visit));
+    const confPct = Math.round((data.confidence !== undefined ? data.confidence : 1.0) * 100);
     const confColor = confPct >= 80 ? "var(--green)" : confPct >= 60 ? "var(--amber)" : "var(--red)";
     const confEl = document.getElementById("confidence-strip");
     confEl.style.display = "block";
     confEl.innerHTML = `
+      <div style="margin-bottom:8px;">
+        <span style="color:${confColor};font-weight:600;">
+          Extraction confidence: ${confPct}%
+        </span>
+        ${confPct < 80 ? " -- verify fields" : " -- extraction successful"}
+      </div>
+      ${data.notes ? `<div style="font-size:0.85rem;color:var(--text-muted);border-top:1px solid #eee;padding-top:8px;">${data.notes}</div>` : ""}
+      ${data.raw_text ? `<div style="font-size:0.75rem;color:#999;margin-top:4px;font-family:monospace;background:#f9f9f9;padding:4px;border-radius:4px;">OCR: ${data.raw_text}...</div>` : ""}
     `;
     statusEl.className = "extract-status success";