Spaces:

quantumbit
/

invoice_extractor

Paused

App Files Files Community

github-actions[bot] commited on Feb 6

Commit

b168249

1 Parent(s): a54dc28

Sync from GitHub: 93ddaf17652e6085351336df41fe84c7fe1a4144

Browse files

Files changed (1) hide show

inference.py +35 -9

inference.py CHANGED Viewed

@@ -302,22 +302,48 @@ class InferenceProcessor:
         reasoning_text = ""
         extraction_json = output_text
         try:
-            # Try to parse as JSON
-            parsed = json.loads(output_text.strip())
             if "reasoning" in parsed:
                 reasoning_text = parsed["reasoning"]
                 # Remove reasoning from output to get clean extraction JSON
                 extraction_dict = {k: v for k, v in parsed.items() if k != "reasoning"}
                 extraction_json = json.dumps(extraction_dict)
-        except:
-            # If parsing fails, try to split manually
-            # Look for JSON pattern
-            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', output_text, re.DOTALL)
             if json_match:
-                extraction_json = json_match.group(0)
-                # Everything before JSON is reasoning
-                reasoning_text = output_text[:json_match.start()].strip()
         print(f"🧠 Combined reasoning + extraction completed in {latency:.2f}s")
         return reasoning_text, extraction_json, latency

         reasoning_text = ""
         extraction_json = output_text
+        # First, remove markdown code fences if present
+        cleaned_output = output_text.strip()
+        if cleaned_output.startswith('```'):
+            # Remove opening ```json or ```
+            lines = cleaned_output.split('\n')
+            if lines[0].startswith('```'):
+                lines = lines[1:]
+            # Remove closing ```
+            if lines and lines[-1].strip() == '```':
+                lines = lines[:-1]
+            cleaned_output = '\n'.join(lines).strip()
         try:
+            # Try to parse the cleaned JSON
+            parsed = json.loads(cleaned_output)
             if "reasoning" in parsed:
                 reasoning_text = parsed["reasoning"]
                 # Remove reasoning from output to get clean extraction JSON
                 extraction_dict = {k: v for k, v in parsed.items() if k != "reasoning"}
                 extraction_json = json.dumps(extraction_dict)
+            else:
+                # No reasoning field, use entire output as extraction
+                extraction_json = cleaned_output
+        except json.JSONDecodeError:
+            # If parsing fails, try to find JSON pattern in the text
+            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', cleaned_output, re.DOTALL)
             if json_match:
+                json_str = json_match.group(0)
+                try:
+                    parsed = json.loads(json_str)
+                    if "reasoning" in parsed:
+                        reasoning_text = parsed["reasoning"]
+                        extraction_dict = {k: v for k, v in parsed.items() if k != "reasoning"}
+                        extraction_json = json.dumps(extraction_dict)
+                    else:
+                        extraction_json = json_str
+                except:
+                    extraction_json = json_str
+                # Everything before JSON is additional reasoning
+                prefix = cleaned_output[:json_match.start()].strip()
+                if prefix and not reasoning_text:
+                    reasoning_text = prefix
         print(f"🧠 Combined reasoning + extraction completed in {latency:.2f}s")
         return reasoning_text, extraction_json, latency