Spaces:

Underground-Digital
/

cc-api

Sleeping

App Files Files Community

Severian commited on Dec 11, 2024

Commit

7e0a2e6

verified ·

1 Parent(s): 565b90d

Update json_parser.py

Browse files

Files changed (1) hide show

json_parser.py +152 -102

json_parser.py CHANGED Viewed

@@ -1,140 +1,190 @@
 from logger_config import setup_logger
-from typing import Dict, Any, Optional, List, Union
-from dataclasses import dataclass, asdict
 from enum import Enum
 import json
-from dify_client_python.dify_client.models.stream import (
-    StreamEvent,
-    StreamResponse,
-    build_chat_stream_response
-)
 import re
 logger = setup_logger()
-class EventType(Enum):
-    AGENT_THOUGHT = "agent_thought"
-    AGENT_MESSAGE = "agent_message"
-    MESSAGE_END = "message_end"
-    PING = "ping"
-@dataclass
-class ToolCall:
-    tool_name: str
-    tool_input: Dict[str, Any]
-    tool_output: Optional[str]
-    tool_labels: Dict[str, Dict[str, str]]
-@dataclass
-class Citation:
-    dataset_id: str
-    dataset_name: str
-    document_id: str
-    document_name: str
-    segment_id: str
-    score: float
-    content: str
-@dataclass
-class ProcessedResponse:
-    event_type: EventType
-    task_id: str
-    message_id: str
-    conversation_id: str
-    content: str
-    tool_calls: List[ToolCall]
-    citations: List[Citation]
-    metadata: Dict[str, Any]
-    created_at: int
-class EnumEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, Enum):
-            return obj.value
-        if hasattr(obj, 'dict'):
-            return obj.dict()
-        return super().default(obj)
 class SSEParser:
     def __init__(self):
         self.logger = setup_logger("sse_parser")
-    def parse_sse_event(self, data: str) -> Optional[Dict]:
-        """Parse SSE event data with improved mermaid handling"""
-        self.logger.debug("Parsing SSE event")
         try:
-            # Extract the data portion
-            if "data:" in data:
-                data = data.split("data:", 1)[1].strip()
-            # Parse JSON data
-            parsed_data = json.loads(data)
-            # Enhanced mermaid diagram handling
-            if "observation" in parsed_data:
-                try:
-                    observation = parsed_data["observation"]
-                    if observation and isinstance(observation, str):
-                        if "mermaid_diagram" in observation:
-                            try:
-                                tool_data = json.loads(observation)
-                                if isinstance(tool_data, dict):
-                                    mermaid_content = tool_data.get(
-                                        "mermaid_diagram", ""
-                                    )
-                                    if mermaid_content:
-                                        # Clean and format mermaid content
-                                        cleaned_content = self.clean_mermaid_content(
-                                            mermaid_content
-                                        )
-                                        parsed_data["observation"] = json.dumps({
-                                            "mermaid_diagram": cleaned_content
-                                        })
-                            except json.JSONDecodeError:
-                                self.logger.warning(
-                                    "Failed to parse mermaid diagram content"
-                                )
-                except Exception as e:
-                    self.logger.error(f"Error processing observation: {str(e)}")
-            return parsed_data
-        except json.JSONDecodeError as e:
-            self.logger.error(f"JSON decode error: {str(e)}")
             return None
         except Exception as e:
             self.logger.error(f"Parse error: {str(e)}")
             return None
     def clean_mermaid_content(self, content: str) -> str:
         """Clean and format mermaid diagram content"""
         try:
-            # If content is JSON string, parse it
-            if isinstance(content, str) and content.strip().startswith('{'):
-                content_dict = json.loads(content)
-                if "mermaid_diagram" in content_dict:
-                    content = content_dict["mermaid_diagram"]
-            # Remove markdown code blocks
             content = re.sub(r'```mermaid\s*|\s*```', '', content)
-            # Remove "tool response:" and any JSON wrapper
             content = re.sub(r'tool response:.*?{', '{', content)
             content = re.sub(r'}\s*\.$', '}', content)
-            # If still JSON, extract mermaid content
             if content.strip().startswith('{'):
                 try:
                     content_dict = json.loads(content)
-                    if "mermaid_diagram" in content_dict:
-                        content = content_dict["mermaid_diagram"]
                 except:
                     pass
-            # Final cleanup
-            content = re.sub(r'\s+', ' ', content.strip())
-            return content
         except Exception as e:
             self.logger.error(f"Error cleaning mermaid content: {e}")

 from logger_config import setup_logger
+from typing import Dict, Any, Optional, List, Union, Tuple
+from dataclasses import dataclass
 from enum import Enum
 import json
 import re
 logger = setup_logger()
+class MessageState:
+    def __init__(self):
+        self.buffer = ""
+        self.is_complete = False
+        self.tool_outputs = []
+        self.citations = []
+        self.metadata = {}
+        self.processed_events = set()
+        self.current_message_id = None
 class SSEParser:
     def __init__(self):
         self.logger = setup_logger("sse_parser")
+        self.current_message = MessageState()
+    def _extract_json_content(self, data: str) -> Optional[str]:
+        """Extract JSON content from SSE data line"""
+        if "data:" in data:
+            return data.split("data:", 1)[1].strip()
+        return None
+    def _is_valid_json(self, content: str) -> bool:
+        """Check if content is valid JSON"""
         try:
+            json.loads(content)
+            return True
+        except json.JSONDecodeError:
+            return False
+    def _clean_mermaid_content(self, content: str) -> Optional[str]:
+        """Clean and extract mermaid diagram content"""
+        try:
+            # Remove tool response prefix/suffix if present
+            if "tool response:" in content:
+                content = content.split("tool response:", 1)[1].strip()
+            # Parse JSON if present
+            try:
+                data = json.loads(content)
+                # Handle different mermaid output formats
+                if "mermaid_output" in data:
+                    content = data["mermaid_output"]
+                elif "mermaid_diagram" in data:
+                    content = data["mermaid_diagram"]
+            except json.JSONDecodeError:
+                pass
+            # Clean up markdown formatting
+            content = content.replace("```mermaid\n", "").replace("\n```", "")
+            return content.strip()
+        except Exception as e:
+            self.logger.error(f"Error cleaning mermaid content: {str(e)}")
+            return None
+    def parse_sse_event(self, data: str) -> Optional[Dict]:
+        """Parse SSE event data and format for frontend consumption"""
+        try:
+            # Extract JSON content from SSE data
+            json_content = self._extract_json_content(data)
+            if not json_content:
+                return None
+            # Parse JSON content
+            parsed_data = json.loads(json_content)
+            # Get event details
+            event_type = parsed_data.get("event")
+            message_id = parsed_data.get("message_id")
+            # Format based on event type
+            if event_type == "agent_message":
+                return {
+                    "type": "message",
+                    "content": parsed_data.get("answer", ""),
+                    "message_id": message_id
+                }
+            elif event_type == "agent_thought":
+                thought = parsed_data.get("thought", "")
+                observation = parsed_data.get("observation", "")
+                tool = parsed_data.get("tool", "")
+                # Handle tool-specific formatting
+                if tool == "mermaid_diagrams":
+                    try:
+                        cleaned_content = self._clean_mermaid_content(observation)
+                        if cleaned_content:
+                            return {
+                                "type": "tool_output",
+                                "tool": "mermaid",
+                                "content": cleaned_content,
+                                "message_id": message_id
+                            }
+                    except Exception as e:
+                        self.logger.error(f"Failed to parse mermaid data: {str(e)}")
+                return {
+                    "type": "thought",
+                    "content": {
+                        "thought": thought,
+                        "observation": observation,
+                        "tool": tool
+                    },
+                    "message_id": message_id
+                }
+            elif event_type == "message_end":
+                return {
+                    "type": "end",
+                    "message_id": message_id,
+                    "metadata": parsed_data.get("metadata", {})
+                }
             return None
         except Exception as e:
             self.logger.error(f"Parse error: {str(e)}")
             return None
+    def _process_observation(self, data: Dict) -> Dict:
+        """Process observation content with special handling for tool outputs"""
+        try:
+            observation = data.get("observation")
+            if observation and isinstance(observation, str):
+                # Handle tool-specific content
+                if "mermaid_diagram" in observation:
+                    cleaned_content = self.clean_mermaid_content(observation)
+                    if cleaned_content not in [t.get("content") for t in self.current_message.tool_outputs]:
+                        self.current_message.tool_outputs.append({
+                            "type": "mermaid_diagram",
+                            "content": cleaned_content
+                        })
+                        data["observation"] = json.dumps({
+                            "mermaid_diagram": cleaned_content
+                        })
+                elif self._is_valid_json(observation):
+                    # Handle other tool outputs
+                    try:
+                        tool_data = json.loads(observation)
+                        if isinstance(tool_data, dict):
+                            for tool_name, tool_output in tool_data.items():
+                                if tool_output not in [t.get("content") for t in self.current_message.tool_outputs]:
+                                    self.current_message.tool_outputs.append({
+                                        "type": tool_name,
+                                        "content": tool_output
+                                    })
+                    except json.JSONDecodeError:
+                        pass
+        except Exception as e:
+            self.logger.error(f"Error processing observation: {str(e)}")
+        return data
+    def _handle_message_end(self, data: Dict) -> None:
+        """Handle message end event and cleanup state"""
+        self.current_message.citations = data.get("retriever_resources", [])
+        self.current_message.metadata = data.get("metadata", {})
+        self.current_message.metadata["tool_outputs"] = self.current_message.tool_outputs
+        self.current_message.is_complete = True
     def clean_mermaid_content(self, content: str) -> str:
         """Clean and format mermaid diagram content"""
         try:
+            # Remove markdown and JSON formatting
             content = re.sub(r'```mermaid\s*|\s*```', '', content)
             content = re.sub(r'tool response:.*?{', '{', content)
             content = re.sub(r'}\s*\.$', '}', content)
+            # Parse JSON if present
             if content.strip().startswith('{'):
                 try:
                     content_dict = json.loads(content)
+                    if isinstance(content_dict, dict):
+                        content = content_dict.get("mermaid_diagram", content)
                 except:
                     pass
+            return content.strip()
         except Exception as e:
             self.logger.error(f"Error cleaning mermaid content: {e}")