PDF_Upload

Sleeping

App Files Files Community

Seth0330 commited on May 20, 2025

Commit

0be1fe6

verified ·

1 Parent(s): a025f40

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -34

app.py CHANGED Viewed

@@ -71,7 +71,7 @@ def query_llm(model_choice, prompt):
     try:
         with st.spinner(f"🔍 Analyzing with {model_choice}..."):
-            response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)  # Increased timeout
             if response.status_code != 200:
                 st.error(f"🚨 API Error {response.status_code}: {response.text}")
@@ -81,17 +81,6 @@ def query_llm(model_choice, prompt):
                 content = response.json()["choices"][0]["message"]["content"]
                 st.session_state.last_api_response = content
                 st.session_state.last_api_response_raw = response.text
-                # Enhanced JSON repair for Llama 4
-                if model_choice == "Llama 4 Mavericks":
-                    if '"line_items":' in content and not content.strip().endswith('}}'):
-                        st.warning("⚠️ Detected incomplete response. Attempting repair...")
-                        # Try to salvage what we have
-                        if '"line_items": [' in content:
-                            content = content.split('"line_items": [')[0] + '"line_items": []}}'
-                        else:
-                            content = content.split('"line_items":')[0] + '"line_items": []}}'
                 return content
             except KeyError as e:
                 st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
@@ -102,36 +91,55 @@ def query_llm(model_choice, prompt):
         return None
 def clean_json_response(text):
-    """Improved JSON extraction with better Llama 4 handling"""
     if not text:
         return None
-    # Enhanced repair for Llama 4's common truncation points
-    if '"line_items":' in text:
-        if not text.strip().endswith('}}'):
-            # Case 1: Line items started but not finished
-            if '"line_items": [' in text and not text.strip().endswith(']}'):
-                text = text.split('"line_items": [')[0] + '"line_items": []}}'
-            # Case 2: Line items key exists but no array
-            else:
-                text = text.split('"line_items":')[0] + '"line_items": []}}'
-    # Try parsing the (possibly repaired) JSON
     try:
         data = json.loads(text)
-        # Ensure line_items exists even if empty
-        if "line_items" not in data:
-            if "invoice_header" in data:  # Llama 4 structure
-                data["line_items"] = []
-            else:  # DeepSeek structure
-                data["line_items"] = []
         return data
-    except json.JSONDecodeError as e:
-        st.warning(f"JSON parsing failed after repair attempts: {str(e)}")
         return None
 def get_extraction_prompt(model_choice, text):
     """Return the appropriate prompt based on model choice"""
     if model_choice == "DeepSeek v3":
@@ -210,6 +218,8 @@ Rules:
 Invoice Text:
 """ + text
 def format_currency(value):
     """Helper function to format currency values consistently"""
     if not value:

     try:
         with st.spinner(f"🔍 Analyzing with {model_choice}..."):
+            response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)
             if response.status_code != 200:
                 st.error(f"🚨 API Error {response.status_code}: {response.text}")
                 content = response.json()["choices"][0]["message"]["content"]
                 st.session_state.last_api_response = content
                 st.session_state.last_api_response_raw = response.text
                 return content
             except KeyError as e:
                 st.error(f"KeyError in response: {e}\nFull response: {response.json()}")
         return None
 def clean_json_response(text):
+    """Improved JSON extraction with comprehensive error handling"""
     if not text:
         return None
+    # First attempt to parse directly
     try:
         data = json.loads(text)
         return data
+    except json.JSONDecodeError:
+        pass
+    # Try to extract JSON from potential markdown
+    json_match = re.search(r'```(?:json)?\n({.*?})\n```', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    # Try to find any JSON-like structure
+    try:
+        start_idx = text.find('{')
+        end_idx = text.rfind('}') + 1
+        if start_idx != -1 and end_idx != 0:
+            return json.loads(text[start_idx:end_idx])
+    except:
+        pass
+    # Final fallback - manual reconstruction
+    try:
+        if '"invoice_header":' in text and '"line_items":' in text:
+            header_part = text.split('"line_items":')[0]
+            line_items_part = text.split('"line_items":')[1]
+            # Ensure proper closing of JSON
+            if not header_part.strip().endswith('{'):
+                header_part += '{'
+            if not line_items_part.strip().endswith('}}'):
+                line_items_part = line_items_part.split('}')[0] + ']}}'
+            reconstructed = header_part + '"line_items":' + line_items_part
+            return json.loads(reconstructed)
+    except Exception as e:
+        st.warning(f"Could not fully reconstruct JSON: {str(e)}")
         return None
+    return None
 def get_extraction_prompt(model_choice, text):
     """Return the appropriate prompt based on model choice"""
     if model_choice == "DeepSeek v3":
 Invoice Text:
 """ + text
+[Rest of the code remains exactly the same from the previous version...]
 def format_currency(value):
     """Helper function to format currency values consistently"""
     if not value: