Spaces:

SAAHMATHWORKS
/

MultiCountryRAG

Sleeping

App Files Files Community

SAAHMATHWORKS commited on Oct 8, 2025

Commit

8f0db18

1 Parent(s): 69f5099

production

Browse files

Files changed (2) hide show

api/main.py +97 -21
models/state_models.py +62 -14

api/main.py CHANGED Viewed

@@ -4,7 +4,7 @@ import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
-from typing import Optional
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Query, HTTPException
 from fastapi.responses import StreamingResponse, HTMLResponse
@@ -34,6 +34,66 @@ graph = None
 system_initialized = False
 async def initialize_system():
     global chat_manager, graph, system_initialized
     try:
@@ -258,48 +318,64 @@ async def generate_legal_chat_responses(message: str, session_id: Optional[str]
             if node_name != current_node:
                 current_node = node_name
-                yield f"data: {json.dumps({'type': 'node_transition', 'node': node_name})}\n\n"
             if event_type == "on_chat_model_stream":
                 chunk_content = serialize_ai_message_chunk(event["data"]["chunk"])
                 current_content += chunk_content
-                yield f"data: {json.dumps({'type': 'content', 'content': chunk_content})}\n\n"
             elif event_type == "on_chat_model_end":
-                yield f"data: {json.dumps({'type': 'content_end'})}\n\n"
             elif event_type == "on_chain_start" and "retrieval" in node_name:
                 country = node_name.replace("_retrieval", "")
-                yield f"data: {json.dumps({'type': 'search_start', 'country': country})}\n\n"
             elif event_type == "on_chain_end" and "retrieval" in node_name:
                 country = node_name.replace("_retrieval", "")
-                yield f"data: {json.dumps({'type': 'search_end', 'country': country})}\n\n"
             elif event_type == "on_tool_end":
                 tool_name = event["name"]
-                yield f"data: {json.dumps({'type': 'tool_complete', 'tool': tool_name})}\n\n"
             elif event_type == "on_graph_end":
-                # Capture and convert the final state
-                state = event.get("data", {}).get("output")
-                if state and isinstance(state, MultiCountryLegalState):
-                    final_state = state
-                    # Use our custom model_dump method for proper serialization
-                    state_dict = state.model_dump()
-                    yield f"data: {json.dumps({'type': 'state', 'content': state_dict})}\n\n"
-                yield f"data: {json.dumps({'type': 'graph_end'})}\n\n"
     except Exception as e:
         logger.error(f"Error in generate_legal_chat_responses: {e}", exc_info=True)
-        yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
-    # Yield final state if captured
-    if final_state and isinstance(final_state, MultiCountryLegalState):
-        final_state_dict = final_state.model_dump()
-        yield f"data: {json.dumps({'type': 'final_state', 'content': final_state_dict})}\n\n"
-    yield f"data: {json.dumps({'type': 'end'})}\n\n"
 @app.get("/chat")

 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
+from typing import Optional, Any
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Query, HTTPException
 from fastapi.responses import StreamingResponse, HTMLResponse
 system_initialized = False
+# ============================================================================
+# CRITICAL: Safe JSON Serialization Utilities
+# ============================================================================
+class SafeJSONEncoder(json.JSONEncoder):
+    """
+    Custom JSON encoder that safely handles Pydantic models and other non-serializable objects.
+    This is the ultimate fallback for any serialization issues.
+    """
+    def default(self, obj):
+        # Handle Pydantic models
+        if hasattr(obj, 'model_dump'):
+            return obj.model_dump()
+        if hasattr(obj, 'dict'):
+            return obj.dict()
+        # Handle LangChain messages
+        if isinstance(obj, BaseMessage):
+            return {
+                "role": "assistant" if isinstance(obj, AIMessage) else "user",
+                "content": obj.content if hasattr(obj, 'content') else str(obj),
+                "meta": getattr(obj, "additional_kwargs", {}),
+            }
+        # Handle sets
+        if isinstance(obj, set):
+            return list(obj)
+        # Handle bytes
+        if isinstance(obj, bytes):
+            return obj.decode('utf-8', errors='ignore')
+        # Fallback: convert to string
+        try:
+            return str(obj)
+        except Exception:
+            return f"<Unserializable: {type(obj).__name__}>"
+def safe_json_dumps(obj: Any) -> str:
+    """
+    Safely convert any object to JSON string with multiple fallback strategies.
+    """
+    try:
+        # Try standard JSON encoding first
+        return json.dumps(obj)
+    except (TypeError, ValueError):
+        try:
+            # Try with custom encoder
+            return json.dumps(obj, cls=SafeJSONEncoder)
+        except Exception:
+            try:
+                # Try with default=str fallback
+                return json.dumps(obj, default=str)
+            except Exception as e:
+                # Ultimate fallback: return error message
+                logger.error(f"Complete JSON serialization failure: {e}")
+                return json.dumps({"error": "serialization_failed", "message": str(e)})
+# ============================================================================
 async def initialize_system():
     global chat_manager, graph, system_initialized
     try:
             if node_name != current_node:
                 current_node = node_name
+                yield f"data: {safe_json_dumps({'type': 'node_transition', 'node': node_name})}\n\n"
             if event_type == "on_chat_model_stream":
                 chunk_content = serialize_ai_message_chunk(event["data"]["chunk"])
                 current_content += chunk_content
+                yield f"data: {safe_json_dumps({'type': 'content', 'content': chunk_content})}\n\n"
             elif event_type == "on_chat_model_end":
+                yield f"data: {safe_json_dumps({'type': 'content_end'})}\n\n"
             elif event_type == "on_chain_start" and "retrieval" in node_name:
                 country = node_name.replace("_retrieval", "")
+                yield f"data: {safe_json_dumps({'type': 'search_start', 'country': country})}\n\n"
             elif event_type == "on_chain_end" and "retrieval" in node_name:
                 country = node_name.replace("_retrieval", "")
+                yield f"data: {safe_json_dumps({'type': 'search_end', 'country': country})}\n\n"
             elif event_type == "on_tool_end":
                 tool_name = event["name"]
+                yield f"data: {safe_json_dumps({'type': 'tool_complete', 'tool': tool_name})}\n\n"
             elif event_type == "on_graph_end":
+                # Capture and convert the final state - WITH SAFE SERIALIZATION
+                try:
+                    state = event.get("data", {}).get("output")
+                    if state:
+                        if isinstance(state, MultiCountryLegalState):
+                            final_state = state
+                            # Use our custom model_dump method for proper serialization
+                            state_dict = state.model_dump()
+                        elif isinstance(state, dict):
+                            state_dict = state
+                        else:
+                            # Fallback: convert to string
+                            state_dict = {"state": str(state)}
+                        yield f"data: {safe_json_dumps({'type': 'state', 'content': state_dict})}\n\n"
+                except Exception as state_error:
+                    logger.warning(f"Could not serialize state: {state_error}")
+                    # Don't fail, just skip state output
+                yield f"data: {safe_json_dumps({'type': 'graph_end'})}\n\n"
     except Exception as e:
         logger.error(f"Error in generate_legal_chat_responses: {e}", exc_info=True)
+        yield f"data: {safe_json_dumps({'type': 'error', 'message': str(e)})}\n\n"
+    # Yield final state if captured - WITH SAFE SERIALIZATION
+    try:
+        if final_state and isinstance(final_state, MultiCountryLegalState):
+            final_state_dict = final_state.model_dump()
+            yield f"data: {safe_json_dumps({'type': 'final_state', 'content': final_state_dict})}\n\n"
+    except Exception as final_error:
+        logger.warning(f"Could not serialize final state: {final_error}")
+        # Don't fail, just skip final state output
+    yield f"data: {safe_json_dumps({'type': 'end'})}\n\n"
 @app.get("/chat")

models/state_models.py CHANGED Viewed

@@ -4,6 +4,9 @@ from pydantic import BaseModel, Field, ConfigDict
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
 import operator
 import json
 class MultiCountryLegalState(BaseModel):
@@ -76,24 +79,65 @@ class MultiCountryLegalState(BaseModel):
         Override model_dump to ensure proper serialization for PostgreSQL checkpointing.
         This fixes: TypeError: Object of type MultiCountryLegalState is not JSON serializable
         """
-        data = super().model_dump(**kwargs)
         # Ensure all nested objects are JSON-serializable
         # Messages should already be dicts, but double-check
         if "messages" in data and data["messages"]:
             serialized_messages = []
             for msg in data["messages"]:
-                if isinstance(msg, dict):
-                    serialized_messages.append(msg)
-                elif isinstance(msg, BaseMessage):
-                    # Convert LangChain message objects to dicts
-                    serialized_messages.append({
-                        "role": "assistant" if isinstance(msg, AIMessage) else "user",
-                        "content": msg.content,
-                        "meta": getattr(msg, "additional_kwargs", {}),
-                    })
-                else:
-                    # Fallback for any other type
                     serialized_messages.append({
                         "role": "unknown",
                         "content": str(msg),
@@ -104,8 +148,12 @@ class MultiCountryLegalState(BaseModel):
         # Ensure nested dicts are serializable
         for key in ["legal_context", "pending_assistance_data", "search_metadata"]:
             if key in data and data[key]:
-                # Convert any non-serializable objects to strings
-                data[key] = self._make_json_serializable(data[key])
         return data

 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
 import operator
 import json
+import logging
+logger = logging.getLogger(__name__)
 class MultiCountryLegalState(BaseModel):
         Override model_dump to ensure proper serialization for PostgreSQL checkpointing.
         This fixes: TypeError: Object of type MultiCountryLegalState is not JSON serializable
         """
+        try:
+            data = super().model_dump(**kwargs)
+        except Exception as e:
+            logger.warning(f"Standard model_dump failed: {e}, using manual serialization")
+            # Fallback to manual serialization
+            data = {
+                "messages": self.messages if isinstance(self.messages, list) else [],
+                "legal_context": self.legal_context if isinstance(self.legal_context, dict) else {},
+                "supplemental_message": self.supplemental_message or "",
+                "session_id": self.session_id,
+                "last_search_query": self.last_search_query,
+                "detected_articles": self.detected_articles if isinstance(self.detected_articles, list) else [],
+                "router_decision": self.router_decision,
+                "search_results": self.search_results,
+                "route_explanation": self.route_explanation,
+                "country": self.country,
+                "assistance_requested": self.assistance_requested,
+                "user_email": self.user_email,
+                "assistance_description": self.assistance_description,
+                "email_status": self.email_status,
+                "assistance_step": self.assistance_step,
+                "pending_assistance_data": self.pending_assistance_data if isinstance(self.pending_assistance_data, dict) else {},
+                "repair_type": self.repair_type,
+                "original_query": self.original_query,
+                "misunderstanding_count": self.misunderstanding_count,
+                "primary_intent": self.primary_intent,
+                "approval_status": self.approval_status,
+                "approval_reason": self.approval_reason,
+                "approved_by": self.approved_by,
+                "approval_timestamp": self.approval_timestamp,
+                "summary_generated": self.summary_generated,
+                "last_summary_timestamp": self.last_summary_timestamp,
+                "search_metadata": self.search_metadata if isinstance(self.search_metadata, dict) else {},
+            }
         # Ensure all nested objects are JSON-serializable
         # Messages should already be dicts, but double-check
         if "messages" in data and data["messages"]:
             serialized_messages = []
             for msg in data["messages"]:
+                try:
+                    if isinstance(msg, dict):
+                        serialized_messages.append(msg)
+                    elif isinstance(msg, BaseMessage):
+                        # Convert LangChain message objects to dicts
+                        serialized_messages.append({
+                            "role": "assistant" if isinstance(msg, AIMessage) else "user",
+                            "content": msg.content,
+                            "meta": getattr(msg, "additional_kwargs", {}),
+                        })
+                    else:
+                        # Fallback for any other type
+                        serialized_messages.append({
+                            "role": "unknown",
+                            "content": str(msg),
+                            "meta": {}
+                        })
+                except Exception as msg_error:
+                    logger.warning(f"Error serializing message: {msg_error}")
                     serialized_messages.append({
                         "role": "unknown",
                         "content": str(msg),
         # Ensure nested dicts are serializable
         for key in ["legal_context", "pending_assistance_data", "search_metadata"]:
             if key in data and data[key]:
+                try:
+                    # Convert any non-serializable objects to strings
+                    data[key] = self._make_json_serializable(data[key])
+                except Exception as dict_error:
+                    logger.warning(f"Error serializing {key}: {dict_error}")
+                    data[key] = {}
         return data