RFP_Analyzer_Agent_backup

Build error

App Files Files Community

cryogenic22 commited on Dec 2, 2024

Commit

a56e4f2

verified ·

1 Parent(s): 676d600

Update utils/response_formatter.py

Browse files

Files changed (1) hide show

utils/response_formatter.py +157 -91

utils/response_formatter.py CHANGED Viewed

@@ -1,146 +1,212 @@
-# utils/response_formatter.py
-import re
-from typing import Dict, Optional
 import streamlit as st
 from datetime import datetime
-class ResponseFormatter:
     def __init__(self):
         self.section_keywords = [
             "Summary",
-            "Overview",
-            "Background",
             "Solution",
             "Approach",
             "Benefits",
             "Experience",
-            "Methodology",
             "Implementation",
             "Timeline",
             "Pricing",
             "Why Us",
-            "Next Steps"
         ]
-    def format_response(self, content: str, metadata: Optional[Dict] = None) -> str:
         """
-        Format the AI response into a clean, structured format.
         Args:
             content (str): Raw response content
-            metadata (Optional[Dict]): Additional metadata like sources
         Returns:
-            str: Formatted response
         """
-        # Clean up the content
-        formatted = self._clean_content(content)
-        # Add structural formatting
-        formatted = self._add_structure(formatted)
-        # Format lists and bullet points
-        formatted = self._format_lists(formatted)
-        # Add source references if available
-        if metadata and 'sources' in metadata:
-            formatted = self._add_sources(formatted, metadata['sources'])
-        return formatted
     def _clean_content(self, content: str) -> str:
         """Clean and normalize the content."""
         # Remove multiple newlines
-        cleaned = re.sub(r'\n{3,}', '\n\n', content)
         # Ensure consistent heading formatting
         for keyword in self.section_keywords:
             pattern = rf'(?i)({keyword}:?)\s*\n'
-            cleaned = re.sub(pattern, f'### {keyword}\n\n', cleaned)
-        return cleaned
-    def _add_structure(self, content: str) -> str:
-        """Add structural elements to the content."""
-        # Add section breaks
         sections = re.split(r'(?m)^###\s+', content)
-        if len(sections) == 1:  # No sections found
-            # Add a default section if none exists
-            content = "### Response\n\n" + content
-        # Add copy button for each section
-        structured = ""
         for section in sections:
             if section.strip():
-                section_title = section.split('\n')[0].strip()
-                section_content = '\n'.join(section.split('\n')[1:]).strip()
-                structured += f"### {section_title}\n\n"
-                structured += f"{section_content}\n\n"
-        return structured
-    def _format_lists(self, content: str) -> str:
-        """Format lists and bullet points consistently."""
-        # Format bullet points
-        content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
-        # Format numbered lists
-        content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
-        return content
-    def _add_sources(self, content: str, sources: list) -> str:
-        """Add source references to the content."""
-        if sources:
-            content += "\n\n---\n\n### Sources\n\n"
-            for source in sources:
-                content += f"• {source}\n"
-        return content
-def display_formatted_response(response_content: str, metadata: Optional[Dict] = None):
     """
-    Display a formatted response in the Streamlit interface.
     Args:
         response_content (str): Raw response content
-        metadata (Optional[Dict]): Additional metadata like sources
     """
-    formatter = ResponseFormatter()
-    formatted_content = formatter.format_response(response_content, metadata)
-    # Display formatted content
-    st.markdown(formatted_content)
-    # Add copy buttons for sections
-    sections = re.split(r'(?m)^###\s+', formatted_content)
-    for section in sections:
-        if section.strip():
-            section_title = section.split('\n')[0].strip()
-            section_content = '\n'.join(section.split('\n')[1:]).strip()
-            with st.expander(f"Copy {section_title}", expanded=False):
-                st.text_area(
-                    "Copy this content:",
-                    value=section_content,
-                    height=200,
-                    key=f"copy_{section_title}_{datetime.now().timestamp()}"
-                )
-                if st.button(f"Copy {section_title}", key=f"btn_{section_title}_{datetime.now().timestamp()}"):
-                    st.write("Content copied to clipboard!")
-# Example usage in your chat interface:
-"""
-def display_chat_message(message, is_user=False):
-    if is_user:
-        st.chat_message("user").write(message)
-    else:
-        with st.chat_message("assistant"):
-            display_formatted_response(
-                message.content,
-                metadata=getattr(message, 'metadata', None)
-            )
-"""

+"""
+Enhanced response formatter with source referencing and clean output structure.
+"""
 import streamlit as st
+import re
+from typing import Dict, List, Optional, Any
 from datetime import datetime
+import json
+class EnhancedResponseFormatter:
     def __init__(self):
         self.section_keywords = [
             "Summary",
+            "Key Points",
+            "Requirements",
             "Solution",
             "Approach",
             "Benefits",
             "Experience",
+            "Technical Details",
             "Implementation",
             "Timeline",
             "Pricing",
             "Why Us",
+            "Next Steps",
+            "Recommendations"
         ]
+    def format_response(self,
+                       content: str,
+                       sources: List[Dict[str, Any]] = None) -> Dict[str, Any]:
         """
+        Format the AI response with enhanced structure and source references.
         Args:
             content (str): Raw response content
+            sources (List[Dict]): List of source documents with their metadata
         Returns:
+            Dict with formatted content and source references
         """
+        # Clean and structure the content
+        cleaned_content = self._clean_content(content)
+        structured_content = self._structure_content(cleaned_content)
+        # Process source references
+        source_references = self._process_sources(sources) if sources else []
+        return {
+            'content': structured_content,
+            'sources': source_references,
+            'sections': self._extract_sections(structured_content)
+        }
     def _clean_content(self, content: str) -> str:
         """Clean and normalize the content."""
         # Remove multiple newlines
+        content = re.sub(r'\n{3,}', '\n\n', content)
         # Ensure consistent heading formatting
         for keyword in self.section_keywords:
             pattern = rf'(?i)({keyword}:?)\s*\n'
+            content = re.sub(pattern, f'### {keyword}\n\n', content)
+        # Format lists consistently
+        content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
+        content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
+        return content
+    def _structure_content(self, content: str) -> str:
+        """Add structural elements and formatting to the content."""
+        # Split into sections
         sections = re.split(r'(?m)^###\s+', content)
+        # If no sections found, add default structure
+        if len(sections) == 1:
+            return "### Response\n\n" + content
+        # Process each section
+        formatted_sections = []
         for section in sections:
             if section.strip():
+                # Extract section title and content
+                lines = section.split('\n', 1)
+                if len(lines) == 2:
+                    title, content = lines
+                    formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n")
+        return "\n".join(formatted_sections)
+    def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Process and format source references."""
+        processed_sources = []
+        for source in sources:
+            processed_source = {
+                'document_name': source.get('document_name', ''),
+                'page_number': source.get('page_number'),
+                'chunk_index': source.get('chunk_index'),
+                'content': source.get('content', ''),
+                'relevance_score': source.get('relevance_score', 0),
+                'context': self._extract_context(source.get('content', ''),
+                                              source.get('start_index', 0),
+                                              source.get('end_index', 0))
+            }
+            processed_sources.append(processed_source)
+        return sorted(processed_sources,
+                     key=lambda x: x['relevance_score'],
+                     reverse=True)
+    def _extract_context(self,
+                        content: str,
+                        start_idx: int,
+                        end_idx: int,
+                        context_window: int = 100) -> str:
+        """Extract context around the referenced text."""
+        start = max(0, start_idx - context_window)
+        end = min(len(content), end_idx + context_window)
+        context = content[start:end]
+        if start > 0:
+            context = f"...{context}"
+        if end < len(content):
+            context = f"{context}..."
+        return context
+    def _extract_sections(self, content: str) -> List[Dict[str, str]]:
+        """Extract sections for navigation."""
+        sections = []
+        current_section = None
+        current_content = []
+        for line in content.split('\n'):
+            if line.startswith('### '):
+                if current_section:
+                    sections.append({
+                        'title': current_section,
+                        'content': '\n'.join(current_content)
+                    })
+                current_section = line.replace('### ', '').strip()
+                current_content = []
+            else:
+                current_content.append(line)
+        if current_section:
+            sections.append({
+                'title': current_section,
+                'content': '\n'.join(current_content)
+            })
+        return sections
+def display_enhanced_response(response_content: str,
+                            sources: List[Dict[str, Any]] = None):
     """
+    Display enhanced response with source references and navigation.
     Args:
         response_content (str): Raw response content
+        sources (List[Dict]): Source documents and metadata
     """
+    formatter = EnhancedResponseFormatter()
+    formatted = formatter.format_response(response_content, sources)
+    # Create two columns: main content and source references
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        # Display main content
+        if formatted['sections']:
+            for section in formatted['sections']:
+                with st.expander(section['title'], expanded=True):
+                    st.markdown(section['content'])
+    with col2:
+        st.markdown("### Source References")
+        if formatted['sources']:
+            for idx, source in enumerate(formatted['sources']):
+                with st.expander(f"📄 {source['document_name']}", expanded=False):
+                    st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}")
+                    if source['page_number']:
+                        st.markdown(f"**Page:** {source['page_number']}")
+                    st.markdown("**Context:**")
+                    st.markdown(f"```\n{source['context']}\n```")
+                    if st.button("Show in Document", key=f"show_doc_{idx}"):
+                        show_document_context(source)
+        else:
+            st.info("No source references available for this response.")
+def show_document_context(source: Dict[str, Any]):
+    """Display the full document context in a modal."""
+    st.markdown(f"### Document: {source['document_name']}")
+    # Create tabs for different views
+    tab1, tab2 = st.tabs(["Context View", "Full Document"])
+    with tab1:
+        st.markdown("### Relevant Context")
+        st.markdown(f"```\n{source['context']}\n```")
+    with tab2:
+        st.markdown("### Full Document Content")
+        st.text_area("Content",
+                    value=source['content'],
+                    height=400,
+                    disabled=True)