Spaces:
Build error
Build error
| """ | |
| Enhanced response formatter with source referencing and clean output structure. | |
| """ | |
| import streamlit as st | |
| import re | |
| from typing import Dict, List, Optional, Any | |
| from datetime import datetime | |
| import json | |
| class EnhancedResponseFormatter: | |
| def __init__(self): | |
| self.section_keywords = [ | |
| "Summary", | |
| "Key Points", | |
| "Requirements", | |
| "Solution", | |
| "Approach", | |
| "Benefits", | |
| "Experience", | |
| "Technical Details", | |
| "Implementation", | |
| "Timeline", | |
| "Pricing", | |
| "Why Us", | |
| "Next Steps", | |
| "Recommendations" | |
| ] | |
| def format_response(self, | |
| content: str, | |
| sources: List[Dict[str, Any]] = None) -> Dict[str, Any]: | |
| """ | |
| Format the AI response with enhanced structure and source references. | |
| Args: | |
| content (str): Raw response content | |
| sources (List[Dict]): List of source documents with their metadata | |
| Returns: | |
| Dict with formatted content and source references | |
| """ | |
| # Clean and structure the content | |
| cleaned_content = self._clean_content(content) | |
| structured_content = self._structure_content(cleaned_content) | |
| # Process source references | |
| source_references = self._process_sources(sources) if sources else [] | |
| return { | |
| 'content': structured_content, | |
| 'sources': source_references, | |
| 'sections': self._extract_sections(structured_content) | |
| } | |
| def _clean_content(self, content: str) -> str: | |
| """Clean and normalize the content.""" | |
| # Remove multiple newlines | |
| content = re.sub(r'\n{3,}', '\n\n', content) | |
| # Ensure consistent heading formatting | |
| for keyword in self.section_keywords: | |
| pattern = rf'(?i)({keyword}:?)\s*\n' | |
| content = re.sub(pattern, f'### {keyword}\n\n', content) | |
| # Format lists consistently | |
| content = re.sub(r'(?m)^[\-\*]\s+', '• ', content) | |
| content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content) | |
| return content | |
| def _structure_content(self, content: str) -> str: | |
| """Add structural elements and formatting to the content.""" | |
| # Split into sections | |
| sections = re.split(r'(?m)^###\s+', content) | |
| # If no sections found, add default structure | |
| if len(sections) == 1: | |
| return "### Response\n\n" + content | |
| # Process each section | |
| formatted_sections = [] | |
| for section in sections: | |
| if section.strip(): | |
| # Extract section title and content | |
| lines = section.split('\n', 1) | |
| if len(lines) == 2: | |
| title, content = lines | |
| formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n") | |
| return "\n".join(formatted_sections) | |
| def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
| """Process and format source references.""" | |
| processed_sources = [] | |
| for source in sources: | |
| processed_source = { | |
| 'document_name': source.get('document_name', ''), | |
| 'page_number': source.get('page_number'), | |
| 'chunk_index': source.get('chunk_index'), | |
| 'content': source.get('content', ''), | |
| 'relevance_score': source.get('relevance_score', 0), | |
| 'context': self._extract_context(source.get('content', ''), | |
| source.get('start_index', 0), | |
| source.get('end_index', 0)) | |
| } | |
| processed_sources.append(processed_source) | |
| return sorted(processed_sources, | |
| key=lambda x: x['relevance_score'], | |
| reverse=True) | |
| def _extract_context(self, | |
| content: str, | |
| start_idx: int, | |
| end_idx: int, | |
| context_window: int = 100) -> str: | |
| """Extract context around the referenced text.""" | |
| start = max(0, start_idx - context_window) | |
| end = min(len(content), end_idx + context_window) | |
| context = content[start:end] | |
| if start > 0: | |
| context = f"...{context}" | |
| if end < len(content): | |
| context = f"{context}..." | |
| return context | |
| def _extract_sections(self, content: str) -> List[Dict[str, str]]: | |
| """Extract sections for navigation.""" | |
| sections = [] | |
| current_section = None | |
| current_content = [] | |
| for line in content.split('\n'): | |
| if line.startswith('### '): | |
| if current_section: | |
| sections.append({ | |
| 'title': current_section, | |
| 'content': '\n'.join(current_content) | |
| }) | |
| current_section = line.replace('### ', '').strip() | |
| current_content = [] | |
| else: | |
| current_content.append(line) | |
| if current_section: | |
| sections.append({ | |
| 'title': current_section, | |
| 'content': '\n'.join(current_content) | |
| }) | |
| return sections | |
| def display_enhanced_response(response_content: str, | |
| sources: List[Dict[str, Any]] = None): | |
| """ | |
| Display enhanced response with source references and navigation. | |
| Args: | |
| response_content (str): Raw response content | |
| sources (List[Dict]): Source documents and metadata | |
| """ | |
| formatter = EnhancedResponseFormatter() | |
| formatted = formatter.format_response(response_content, sources) | |
| # Create two columns: main content and source references | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| # Display main content | |
| if formatted['sections']: | |
| for section in formatted['sections']: | |
| with st.expander(section['title'], expanded=True): | |
| st.markdown(section['content']) | |
| with col2: | |
| st.markdown("### Source References") | |
| if formatted['sources']: | |
| for idx, source in enumerate(formatted['sources']): | |
| with st.expander(f"📄 {source['document_name']}", expanded=False): | |
| st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}") | |
| if source['page_number']: | |
| st.markdown(f"**Page:** {source['page_number']}") | |
| st.markdown("**Context:**") | |
| st.markdown(f"```\n{source['context']}\n```") | |
| if st.button("Show in Document", key=f"show_doc_{idx}"): | |
| show_document_context(source) | |
| else: | |
| st.info("No source references available for this response.") | |
| def show_document_context(source: Dict[str, Any]): | |
| """Display the full document context in a modal.""" | |
| st.markdown(f"### Document: {source['document_name']}") | |
| # Create tabs for different views | |
| tab1, tab2 = st.tabs(["Context View", "Full Document"]) | |
| with tab1: | |
| st.markdown("### Relevant Context") | |
| st.markdown(f"```\n{source['context']}\n```") | |
| with tab2: | |
| st.markdown("### Full Document Content") | |
| st.text_area("Content", | |
| value=source['content'], | |
| height=400, | |
| disabled=True) |