Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

chinmayjha commited on Oct 10

Commit

d8c683d

unverified ·

1 Parent(s): d7fa497

Fix Sources section parsing in CustomGradioUI

Browse files

- Added _parse_sources_from_text() method to parse new Sources format
- Updated parse_agent_response() to split answer from Sources section
- Sources section now properly displays separately from answer
- Extracts Doc titles, dates, summaries, and key findings from formatted text

Files changed (1) hide show

src/second_brain_online/application/ui/custom_gradio_ui.py +91 -0

src/second_brain_online/application/ui/custom_gradio_ui.py CHANGED Viewed

@@ -207,6 +207,79 @@ class CustomGradioUI:
             error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
             return error_msg, "", "", str(e), self.load_conversations()
     def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
         """Parse the agent response to extract answer, sources, and tools used."""
         answer = ""
@@ -236,6 +309,24 @@ class CustomGradioUI:
                 # Pattern 3: Use the entire result as answer if no specific pattern matches
                 answer = result_str
         # If we have agent logs, extract tools and sources from them
         if agent_logs:
             for step in agent_logs:

             error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
             return error_msg, "", "", str(e), self.load_conversations()
+    def _parse_sources_from_text(self, sources_text: str) -> List[Dict]:
+        """Parse sources from the formatted text output.
+        Expected format:
+        Doc 1: Title (Date)
+        Source: Type | Document ID: ID | URL | User ID
+        Summary: ...
+        Key Findings:
+        - [Type/Impact] Finding
+        """
+        sources = []
+        # Split by "Doc X:" pattern
+        doc_pattern = r'Doc\s+(\d+):\s*([^\n]+)'
+        doc_matches = re.finditer(doc_pattern, sources_text)
+        for match in doc_matches:
+            doc_num = match.group(1)
+            title_line = match.group(2).strip()
+            # Find the next Doc or end of string
+            start_pos = match.end()
+            next_match = re.search(r'Doc\s+\d+:', sources_text[start_pos:])
+            if next_match:
+                end_pos = start_pos + next_match.start()
+                doc_content = sources_text[start_pos:end_pos]
+            else:
+                doc_content = sources_text[start_pos:]
+            # Extract title and date from title line
+            title_date_match = re.match(r'(.+?)\s*\(([^)]+)\)', title_line)
+            if title_date_match:
+                title = title_date_match.group(1).strip()
+                date = title_date_match.group(2).strip()
+            else:
+                title = title_line
+                date = ""
+            # Extract document ID
+            doc_id = ""
+            id_match = re.search(r'Document ID:\s*([a-zA-Z0-9]+)', doc_content)
+            if id_match:
+                doc_id = id_match.group(1)
+            # Extract summary
+            summary = ""
+            summary_match = re.search(r'Summary:\s*([^\n]+)', doc_content)
+            if summary_match:
+                summary = summary_match.group(1).strip()
+            # Extract key findings
+            key_findings = []
+            findings_section = re.search(r'Key Findings:\s*(.+?)(?=\n\nDoc\s+\d+:|$)', doc_content, re.DOTALL)
+            if findings_section:
+                findings_text = findings_section.group(1)
+                # Extract each finding line
+                finding_lines = re.findall(r'-\s*\[([^\]]+)\]\s*([^\n]+)', findings_text)
+                for finding_type, finding_text in finding_lines:
+                    key_findings.append(f"[{finding_type}] {finding_text.strip()}")
+            sources.append({
+                "id": doc_id,
+                "title": title,
+                "date": date,
+                "summary": summary,
+                "key_findings": key_findings,
+                "quotes": []  # Not using quotes in new format
+            })
+        return sources
     def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
         """Parse the agent response to extract answer, sources, and tools used."""
         answer = ""
                 # Pattern 3: Use the entire result as answer if no specific pattern matches
                 answer = result_str
+        # NEW: Split answer and sources section
+        # Look for the Sources section marker (📚 Sources:)
+        sources_split = re.split(r'📚\s*Sources:?', answer, maxsplit=1, flags=re.IGNORECASE)
+        if len(sources_split) == 2:
+            # We found a Sources section
+            answer_only = sources_split[0].strip()
+            sources_text = sources_split[1].strip()
+            # Parse sources from the text
+            sources = self._parse_sources_from_text(sources_text)
+            # Update answer to only include the answer part
+            answer = answer_only
+        else:
+            # No sources section found, answer remains as-is
+            pass
         # If we have agent logs, extract tools and sources from them
         if agent_logs:
             for step in agent_logs: