File size: 7,824 Bytes
a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 24b44d0 a56e4f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
"""
Enhanced response formatter with source referencing and clean output structure.
"""
import streamlit as st
import re
from typing import Dict, List, Optional, Any
from datetime import datetime
import json
class EnhancedResponseFormatter:
def __init__(self):
self.section_keywords = [
"Summary",
"Key Points",
"Requirements",
"Solution",
"Approach",
"Benefits",
"Experience",
"Technical Details",
"Implementation",
"Timeline",
"Pricing",
"Why Us",
"Next Steps",
"Recommendations"
]
def format_response(self,
content: str,
sources: List[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Format the AI response with enhanced structure and source references.
Args:
content (str): Raw response content
sources (List[Dict]): List of source documents with their metadata
Returns:
Dict with formatted content and source references
"""
# Clean and structure the content
cleaned_content = self._clean_content(content)
structured_content = self._structure_content(cleaned_content)
# Process source references
source_references = self._process_sources(sources) if sources else []
return {
'content': structured_content,
'sources': source_references,
'sections': self._extract_sections(structured_content)
}
def _clean_content(self, content: str) -> str:
"""Clean and normalize the content."""
# Remove multiple newlines
content = re.sub(r'\n{3,}', '\n\n', content)
# Ensure consistent heading formatting
for keyword in self.section_keywords:
pattern = rf'(?i)({keyword}:?)\s*\n'
content = re.sub(pattern, f'### {keyword}\n\n', content)
# Format lists consistently
content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
return content
def _structure_content(self, content: str) -> str:
"""Add structural elements and formatting to the content."""
# Split into sections
sections = re.split(r'(?m)^###\s+', content)
# If no sections found, add default structure
if len(sections) == 1:
return "### Response\n\n" + content
# Process each section
formatted_sections = []
for section in sections:
if section.strip():
# Extract section title and content
lines = section.split('\n', 1)
if len(lines) == 2:
title, content = lines
formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n")
return "\n".join(formatted_sections)
def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Process and format source references."""
processed_sources = []
for source in sources:
processed_source = {
'document_name': source.get('document_name', ''),
'page_number': source.get('page_number'),
'chunk_index': source.get('chunk_index'),
'content': source.get('content', ''),
'relevance_score': source.get('relevance_score', 0),
'context': self._extract_context(source.get('content', ''),
source.get('start_index', 0),
source.get('end_index', 0))
}
processed_sources.append(processed_source)
return sorted(processed_sources,
key=lambda x: x['relevance_score'],
reverse=True)
def _extract_context(self,
content: str,
start_idx: int,
end_idx: int,
context_window: int = 100) -> str:
"""Extract context around the referenced text."""
start = max(0, start_idx - context_window)
end = min(len(content), end_idx + context_window)
context = content[start:end]
if start > 0:
context = f"...{context}"
if end < len(content):
context = f"{context}..."
return context
def _extract_sections(self, content: str) -> List[Dict[str, str]]:
"""Extract sections for navigation."""
sections = []
current_section = None
current_content = []
for line in content.split('\n'):
if line.startswith('### '):
if current_section:
sections.append({
'title': current_section,
'content': '\n'.join(current_content)
})
current_section = line.replace('### ', '').strip()
current_content = []
else:
current_content.append(line)
if current_section:
sections.append({
'title': current_section,
'content': '\n'.join(current_content)
})
return sections
def display_enhanced_response(response_content: str,
sources: List[Dict[str, Any]] = None):
"""
Display enhanced response with source references and navigation.
Args:
response_content (str): Raw response content
sources (List[Dict]): Source documents and metadata
"""
formatter = EnhancedResponseFormatter()
formatted = formatter.format_response(response_content, sources)
# Create two columns: main content and source references
col1, col2 = st.columns([2, 1])
with col1:
# Display main content
if formatted['sections']:
for section in formatted['sections']:
with st.expander(section['title'], expanded=True):
st.markdown(section['content'])
with col2:
st.markdown("### Source References")
if formatted['sources']:
for idx, source in enumerate(formatted['sources']):
with st.expander(f"📄 {source['document_name']}", expanded=False):
st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}")
if source['page_number']:
st.markdown(f"**Page:** {source['page_number']}")
st.markdown("**Context:**")
st.markdown(f"```\n{source['context']}\n```")
if st.button("Show in Document", key=f"show_doc_{idx}"):
show_document_context(source)
else:
st.info("No source references available for this response.")
def show_document_context(source: Dict[str, Any]):
"""Display the full document context in a modal."""
st.markdown(f"### Document: {source['document_name']}")
# Create tabs for different views
tab1, tab2 = st.tabs(["Context View", "Full Document"])
with tab1:
st.markdown("### Relevant Context")
st.markdown(f"```\n{source['context']}\n```")
with tab2:
st.markdown("### Full Document Content")
st.text_area("Content",
value=source['content'],
height=400,
disabled=True) |