Spaces:
Sleeping
Sleeping
Fix Sources section parsing in CustomGradioUI
Browse files- Added _parse_sources_from_text() method to parse new Sources format
- Updated parse_agent_response() to split answer from Sources section
- Sources section now properly displays separately from answer
- Extracts Doc titles, dates, summaries, and key findings from formatted text
src/second_brain_online/application/ui/custom_gradio_ui.py
CHANGED
|
@@ -207,6 +207,79 @@ class CustomGradioUI:
|
|
| 207 |
error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
|
| 208 |
return error_msg, "", "", str(e), self.load_conversations()
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
|
| 211 |
"""Parse the agent response to extract answer, sources, and tools used."""
|
| 212 |
answer = ""
|
|
@@ -236,6 +309,24 @@ class CustomGradioUI:
|
|
| 236 |
# Pattern 3: Use the entire result as answer if no specific pattern matches
|
| 237 |
answer = result_str
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
# If we have agent logs, extract tools and sources from them
|
| 240 |
if agent_logs:
|
| 241 |
for step in agent_logs:
|
|
|
|
| 207 |
error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
|
| 208 |
return error_msg, "", "", str(e), self.load_conversations()
|
| 209 |
|
| 210 |
+
def _parse_sources_from_text(self, sources_text: str) -> List[Dict]:
|
| 211 |
+
"""Parse sources from the formatted text output.
|
| 212 |
+
|
| 213 |
+
Expected format:
|
| 214 |
+
Doc 1: Title (Date)
|
| 215 |
+
Source: Type | Document ID: ID | URL | User ID
|
| 216 |
+
|
| 217 |
+
Summary: ...
|
| 218 |
+
|
| 219 |
+
Key Findings:
|
| 220 |
+
- [Type/Impact] Finding
|
| 221 |
+
"""
|
| 222 |
+
sources = []
|
| 223 |
+
|
| 224 |
+
# Split by "Doc X:" pattern
|
| 225 |
+
doc_pattern = r'Doc\s+(\d+):\s*([^\n]+)'
|
| 226 |
+
doc_matches = re.finditer(doc_pattern, sources_text)
|
| 227 |
+
|
| 228 |
+
for match in doc_matches:
|
| 229 |
+
doc_num = match.group(1)
|
| 230 |
+
title_line = match.group(2).strip()
|
| 231 |
+
|
| 232 |
+
# Find the next Doc or end of string
|
| 233 |
+
start_pos = match.end()
|
| 234 |
+
next_match = re.search(r'Doc\s+\d+:', sources_text[start_pos:])
|
| 235 |
+
if next_match:
|
| 236 |
+
end_pos = start_pos + next_match.start()
|
| 237 |
+
doc_content = sources_text[start_pos:end_pos]
|
| 238 |
+
else:
|
| 239 |
+
doc_content = sources_text[start_pos:]
|
| 240 |
+
|
| 241 |
+
# Extract title and date from title line
|
| 242 |
+
title_date_match = re.match(r'(.+?)\s*\(([^)]+)\)', title_line)
|
| 243 |
+
if title_date_match:
|
| 244 |
+
title = title_date_match.group(1).strip()
|
| 245 |
+
date = title_date_match.group(2).strip()
|
| 246 |
+
else:
|
| 247 |
+
title = title_line
|
| 248 |
+
date = ""
|
| 249 |
+
|
| 250 |
+
# Extract document ID
|
| 251 |
+
doc_id = ""
|
| 252 |
+
id_match = re.search(r'Document ID:\s*([a-zA-Z0-9]+)', doc_content)
|
| 253 |
+
if id_match:
|
| 254 |
+
doc_id = id_match.group(1)
|
| 255 |
+
|
| 256 |
+
# Extract summary
|
| 257 |
+
summary = ""
|
| 258 |
+
summary_match = re.search(r'Summary:\s*([^\n]+)', doc_content)
|
| 259 |
+
if summary_match:
|
| 260 |
+
summary = summary_match.group(1).strip()
|
| 261 |
+
|
| 262 |
+
# Extract key findings
|
| 263 |
+
key_findings = []
|
| 264 |
+
findings_section = re.search(r'Key Findings:\s*(.+?)(?=\n\nDoc\s+\d+:|$)', doc_content, re.DOTALL)
|
| 265 |
+
if findings_section:
|
| 266 |
+
findings_text = findings_section.group(1)
|
| 267 |
+
# Extract each finding line
|
| 268 |
+
finding_lines = re.findall(r'-\s*\[([^\]]+)\]\s*([^\n]+)', findings_text)
|
| 269 |
+
for finding_type, finding_text in finding_lines:
|
| 270 |
+
key_findings.append(f"[{finding_type}] {finding_text.strip()}")
|
| 271 |
+
|
| 272 |
+
sources.append({
|
| 273 |
+
"id": doc_id,
|
| 274 |
+
"title": title,
|
| 275 |
+
"date": date,
|
| 276 |
+
"summary": summary,
|
| 277 |
+
"key_findings": key_findings,
|
| 278 |
+
"quotes": [] # Not using quotes in new format
|
| 279 |
+
})
|
| 280 |
+
|
| 281 |
+
return sources
|
| 282 |
+
|
| 283 |
def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
|
| 284 |
"""Parse the agent response to extract answer, sources, and tools used."""
|
| 285 |
answer = ""
|
|
|
|
| 309 |
# Pattern 3: Use the entire result as answer if no specific pattern matches
|
| 310 |
answer = result_str
|
| 311 |
|
| 312 |
+
# NEW: Split answer and sources section
|
| 313 |
+
# Look for the Sources section marker (π Sources:)
|
| 314 |
+
sources_split = re.split(r'π\s*Sources:?', answer, maxsplit=1, flags=re.IGNORECASE)
|
| 315 |
+
|
| 316 |
+
if len(sources_split) == 2:
|
| 317 |
+
# We found a Sources section
|
| 318 |
+
answer_only = sources_split[0].strip()
|
| 319 |
+
sources_text = sources_split[1].strip()
|
| 320 |
+
|
| 321 |
+
# Parse sources from the text
|
| 322 |
+
sources = self._parse_sources_from_text(sources_text)
|
| 323 |
+
|
| 324 |
+
# Update answer to only include the answer part
|
| 325 |
+
answer = answer_only
|
| 326 |
+
else:
|
| 327 |
+
# No sources section found, answer remains as-is
|
| 328 |
+
pass
|
| 329 |
+
|
| 330 |
# If we have agent logs, extract tools and sources from them
|
| 331 |
if agent_logs:
|
| 332 |
for step in agent_logs:
|