PDF_Upload

Sleeping

App Files Files Community

Seth0330 commited on May 20, 2025

Commit

a025f40

verified ·

1 Parent(s): 3abc562

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -50

app.py CHANGED Viewed

@@ -56,7 +56,6 @@ def query_llm(model_choice, prompt):
         "Content-Type": "application/json",
     }
-    # Add extra headers if they exist (for OpenRouter)
     if "extra_headers" in config:
         headers.update(config["extra_headers"])
@@ -67,13 +66,12 @@ def query_llm(model_choice, prompt):
         "max_tokens": 2000,
     }
-    # Add response format if specified
     if config["response_format"]:
         payload["response_format"] = config["response_format"]
     try:
         with st.spinner(f"🔍 Analyzing with {model_choice}..."):
-            response = requests.post(config["api_url"], headers=headers, json=payload, timeout=60)
             if response.status_code != 200:
                 st.error(f"🚨 API Error {response.status_code}: {response.text}")
@@ -84,12 +82,15 @@ def query_llm(model_choice, prompt):
                 st.session_state.last_api_response = content
                 st.session_state.last_api_response_raw = response.text
-                # Special handling for Llama 4 Mavericks incomplete responses
-                if model_choice == "Llama 4 Mavericks" and content.count('{') != content.count('}'):
-                    st.warning("⚠️ Received incomplete JSON response. Trying to fix...")
-                    # Try to complete the JSON by adding missing closing braces
-                    missing_braces = content.count('{') - content.count('}')
-                    content += '}' * missing_braces + ']' * (content.count('[') - content.count(']'))
                 return content
             except KeyError as e:
@@ -101,50 +102,34 @@ def query_llm(model_choice, prompt):
         return None
 def clean_json_response(text):
-    """Improved JSON extraction from API response with better error handling"""
     if not text:
         return None
-    # Special handling for Llama 4 Mavericks incomplete responses
-    if 'line_items":' in text and ']' not in text.split('line_items":')[-1]:
-        # Try to complete the line items array
-        text = text.split('line_items":')[0] + 'line_items": []}'
-    # First try to parse directly as JSON
     try:
-        return json.loads(text)
     except json.JSONDecodeError as e:
-        st.warning(f"First JSON parse attempt failed: {str(e)}")
-    # Try to extract JSON from markdown code blocks
-    json_match = re.search(r'```(?:json)?\n({.*?})\n```', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(1))
-        except json.JSONDecodeError as e:
-            st.warning(f"Markdown JSON parse failed: {str(e)}")
-    # Try to extract any JSON-like content
-    json_match = re.search(r'\{.*\}', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(0))
-        except json.JSONDecodeError as e:
-            st.warning(f"Loose JSON parse failed: {str(e)}")
-    # Fallback to simple key-value parsing
-    try:
-        data = {}
-        for line in text.split('\n'):
-            if ':' in line:
-                parts = line.split(':', 1)
-                if len(parts) == 2:
-                    key, val = parts
-                    key = key.strip().strip('"').lower().replace(' ', '_')
-                    data[key] = val.strip().strip('"')
-        return data if data else None
-    except Exception as e:
-        st.error(f"Final fallback parse failed: {str(e)}")
         return None
 def get_extraction_prompt(model_choice, text):
@@ -236,7 +221,7 @@ def format_currency(value):
 def display_line_items(line_items, model_choice="DeepSeek v3"):
     """Display line items in a formatted table"""
     if not line_items:
-        st.info("No line items found in this invoice.")
         return
     st.subheader("📋 Line Items")

         "Content-Type": "application/json",
     }
     if "extra_headers" in config:
         headers.update(config["extra_headers"])
         "max_tokens": 2000,
     }
     if config["response_format"]:
         payload["response_format"] = config["response_format"]
     try:
         with st.spinner(f"🔍 Analyzing with {model_choice}..."):
+            response = requests.post(config["api_url"], headers=headers, json=payload, timeout=90)  # Increased timeout
             if response.status_code != 200:
                 st.error(f"🚨 API Error {response.status_code}: {response.text}")
                 st.session_state.last_api_response = content
                 st.session_state.last_api_response_raw = response.text
+                # Enhanced JSON repair for Llama 4
+                if model_choice == "Llama 4 Mavericks":
+                    if '"line_items":' in content and not content.strip().endswith('}}'):
+                        st.warning("⚠️ Detected incomplete response. Attempting repair...")
+                        # Try to salvage what we have
+                        if '"line_items": [' in content:
+                            content = content.split('"line_items": [')[0] + '"line_items": []}}'
+                        else:
+                            content = content.split('"line_items":')[0] + '"line_items": []}}'
                 return content
             except KeyError as e:
         return None
 def clean_json_response(text):
+    """Improved JSON extraction with better Llama 4 handling"""
     if not text:
         return None
+    # Enhanced repair for Llama 4's common truncation points
+    if '"line_items":' in text:
+        if not text.strip().endswith('}}'):
+            # Case 1: Line items started but not finished
+            if '"line_items": [' in text and not text.strip().endswith(']}'):
+                text = text.split('"line_items": [')[0] + '"line_items": []}}'
+            # Case 2: Line items key exists but no array
+            else:
+                text = text.split('"line_items":')[0] + '"line_items": []}}'
+    # Try parsing the (possibly repaired) JSON
     try:
+        data = json.loads(text)
+        # Ensure line_items exists even if empty
+        if "line_items" not in data:
+            if "invoice_header" in data:  # Llama 4 structure
+                data["line_items"] = []
+            else:  # DeepSeek structure
+                data["line_items"] = []
+        return data
     except json.JSONDecodeError as e:
+        st.warning(f"JSON parsing failed after repair attempts: {str(e)}")
         return None
 def get_extraction_prompt(model_choice, text):
 def display_line_items(line_items, model_choice="DeepSeek v3"):
     """Display line items in a formatted table"""
     if not line_items:
+        st.info("No line items found in this invoice. This may be due to incomplete data from the API.")
         return
     st.subheader("📋 Line Items")