File size: 7,824 Bytes
a56e4f2
 
 
24b44d0
 
a56e4f2
 
24b44d0
a56e4f2
24b44d0
a56e4f2
24b44d0
 
 
a56e4f2
 
24b44d0
 
 
 
a56e4f2
24b44d0
 
 
 
a56e4f2
 
24b44d0
 
a56e4f2
 
 
24b44d0
a56e4f2
24b44d0
 
 
a56e4f2
24b44d0
 
a56e4f2
24b44d0
a56e4f2
 
 
24b44d0
a56e4f2
 
24b44d0
a56e4f2
 
 
 
 
24b44d0
 
 
 
a56e4f2
24b44d0
 
 
 
a56e4f2
24b44d0
a56e4f2
 
 
 
 
24b44d0
a56e4f2
 
 
24b44d0
 
a56e4f2
 
 
24b44d0
a56e4f2
 
24b44d0
 
a56e4f2
 
 
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
 
 
 
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b44d0
a56e4f2
 
24b44d0
a56e4f2
24b44d0
 
 
a56e4f2
24b44d0
a56e4f2
 
24b44d0
a56e4f2
 
24b44d0
a56e4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24b44d0
a56e4f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
"""
Enhanced response formatter with source referencing and clean output structure.
"""

import streamlit as st
import re
from typing import Dict, List, Optional, Any
from datetime import datetime
import json

class EnhancedResponseFormatter:
    def __init__(self):
        self.section_keywords = [
            "Summary",
            "Key Points",
            "Requirements",
            "Solution",
            "Approach",
            "Benefits",
            "Experience",
            "Technical Details",
            "Implementation",
            "Timeline",
            "Pricing",
            "Why Us",
            "Next Steps",
            "Recommendations"
        ]
    
    def format_response(self, 
                       content: str, 
                       sources: List[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        Format the AI response with enhanced structure and source references.
        
        Args:
            content (str): Raw response content
            sources (List[Dict]): List of source documents with their metadata
            
        Returns:
            Dict with formatted content and source references
        """
        # Clean and structure the content
        cleaned_content = self._clean_content(content)
        structured_content = self._structure_content(cleaned_content)
        
        # Process source references
        source_references = self._process_sources(sources) if sources else []
        
        return {
            'content': structured_content,
            'sources': source_references,
            'sections': self._extract_sections(structured_content)
        }
    
    def _clean_content(self, content: str) -> str:
        """Clean and normalize the content."""
        # Remove multiple newlines
        content = re.sub(r'\n{3,}', '\n\n', content)
        
        # Ensure consistent heading formatting
        for keyword in self.section_keywords:
            pattern = rf'(?i)({keyword}:?)\s*\n'
            content = re.sub(pattern, f'### {keyword}\n\n', content)
        
        # Format lists consistently
        content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
        content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
        
        return content
    
    def _structure_content(self, content: str) -> str:
        """Add structural elements and formatting to the content."""
        # Split into sections
        sections = re.split(r'(?m)^###\s+', content)
        
        # If no sections found, add default structure
        if len(sections) == 1:
            return "### Response\n\n" + content
        
        # Process each section
        formatted_sections = []
        for section in sections:
            if section.strip():
                # Extract section title and content
                lines = section.split('\n', 1)
                if len(lines) == 2:
                    title, content = lines
                    formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n")
        
        return "\n".join(formatted_sections)
    
    def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Process and format source references."""
        processed_sources = []
        for source in sources:
            processed_source = {
                'document_name': source.get('document_name', ''),
                'page_number': source.get('page_number'),
                'chunk_index': source.get('chunk_index'),
                'content': source.get('content', ''),
                'relevance_score': source.get('relevance_score', 0),
                'context': self._extract_context(source.get('content', ''), 
                                              source.get('start_index', 0),
                                              source.get('end_index', 0))
            }
            processed_sources.append(processed_source)
        
        return sorted(processed_sources, 
                     key=lambda x: x['relevance_score'], 
                     reverse=True)
    
    def _extract_context(self, 
                        content: str, 
                        start_idx: int, 
                        end_idx: int, 
                        context_window: int = 100) -> str:
        """Extract context around the referenced text."""
        start = max(0, start_idx - context_window)
        end = min(len(content), end_idx + context_window)
        
        context = content[start:end]
        if start > 0:
            context = f"...{context}"
        if end < len(content):
            context = f"{context}..."
            
        return context
    
    def _extract_sections(self, content: str) -> List[Dict[str, str]]:
        """Extract sections for navigation."""
        sections = []
        current_section = None
        current_content = []
        
        for line in content.split('\n'):
            if line.startswith('### '):
                if current_section:
                    sections.append({
                        'title': current_section,
                        'content': '\n'.join(current_content)
                    })
                current_section = line.replace('### ', '').strip()
                current_content = []
            else:
                current_content.append(line)
        
        if current_section:
            sections.append({
                'title': current_section,
                'content': '\n'.join(current_content)
            })
            
        return sections

def display_enhanced_response(response_content: str, 
                            sources: List[Dict[str, Any]] = None):
    """
    Display enhanced response with source references and navigation.
    
    Args:
        response_content (str): Raw response content
        sources (List[Dict]): Source documents and metadata
    """
    formatter = EnhancedResponseFormatter()
    formatted = formatter.format_response(response_content, sources)
    
    # Create two columns: main content and source references
    col1, col2 = st.columns([2, 1])
    
    with col1:
        # Display main content
        if formatted['sections']:
            for section in formatted['sections']:
                with st.expander(section['title'], expanded=True):
                    st.markdown(section['content'])
    
    with col2:
        st.markdown("### Source References")
        
        if formatted['sources']:
            for idx, source in enumerate(formatted['sources']):
                with st.expander(f"📄 {source['document_name']}", expanded=False):
                    st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}")
                    if source['page_number']:
                        st.markdown(f"**Page:** {source['page_number']}")
                    
                    st.markdown("**Context:**")
                    st.markdown(f"```\n{source['context']}\n```")
                    
                    if st.button("Show in Document", key=f"show_doc_{idx}"):
                        show_document_context(source)
        else:
            st.info("No source references available for this response.")

def show_document_context(source: Dict[str, Any]):
    """Display the full document context in a modal."""
    st.markdown(f"### Document: {source['document_name']}")
    
    # Create tabs for different views
    tab1, tab2 = st.tabs(["Context View", "Full Document"])
    
    with tab1:
        st.markdown("### Relevant Context")
        st.markdown(f"```\n{source['context']}\n```")
    
    with tab2:
        st.markdown("### Full Document Content")
        st.text_area("Content", 
                    value=source['content'],
                    height=400,
                    disabled=True)