|
|
""" |
|
|
Enhanced response formatter with source referencing and clean output structure. |
|
|
""" |
|
|
|
|
|
import streamlit as st |
|
|
import re |
|
|
from typing import Dict, List, Optional, Any |
|
|
from datetime import datetime |
|
|
import json |
|
|
|
|
|
class EnhancedResponseFormatter: |
|
|
def __init__(self): |
|
|
self.section_keywords = [ |
|
|
"Summary", |
|
|
"Key Points", |
|
|
"Requirements", |
|
|
"Solution", |
|
|
"Approach", |
|
|
"Benefits", |
|
|
"Experience", |
|
|
"Technical Details", |
|
|
"Implementation", |
|
|
"Timeline", |
|
|
"Pricing", |
|
|
"Why Us", |
|
|
"Next Steps", |
|
|
"Recommendations" |
|
|
] |
|
|
|
|
|
def format_response(self, |
|
|
content: str, |
|
|
sources: List[Dict[str, Any]] = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Format the AI response with enhanced structure and source references. |
|
|
|
|
|
Args: |
|
|
content (str): Raw response content |
|
|
sources (List[Dict]): List of source documents with their metadata |
|
|
|
|
|
Returns: |
|
|
Dict with formatted content and source references |
|
|
""" |
|
|
|
|
|
cleaned_content = self._clean_content(content) |
|
|
structured_content = self._structure_content(cleaned_content) |
|
|
|
|
|
|
|
|
source_references = self._process_sources(sources) if sources else [] |
|
|
|
|
|
return { |
|
|
'content': structured_content, |
|
|
'sources': source_references, |
|
|
'sections': self._extract_sections(structured_content) |
|
|
} |
|
|
|
|
|
def _clean_content(self, content: str) -> str: |
|
|
"""Clean and normalize the content.""" |
|
|
|
|
|
content = re.sub(r'\n{3,}', '\n\n', content) |
|
|
|
|
|
|
|
|
for keyword in self.section_keywords: |
|
|
pattern = rf'(?i)({keyword}:?)\s*\n' |
|
|
content = re.sub(pattern, f'### {keyword}\n\n', content) |
|
|
|
|
|
|
|
|
content = re.sub(r'(?m)^[\-\*]\s+', '• ', content) |
|
|
content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content) |
|
|
|
|
|
return content |
|
|
|
|
|
def _structure_content(self, content: str) -> str: |
|
|
"""Add structural elements and formatting to the content.""" |
|
|
|
|
|
sections = re.split(r'(?m)^###\s+', content) |
|
|
|
|
|
|
|
|
if len(sections) == 1: |
|
|
return "### Response\n\n" + content |
|
|
|
|
|
|
|
|
formatted_sections = [] |
|
|
for section in sections: |
|
|
if section.strip(): |
|
|
|
|
|
lines = section.split('\n', 1) |
|
|
if len(lines) == 2: |
|
|
title, content = lines |
|
|
formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n") |
|
|
|
|
|
return "\n".join(formatted_sections) |
|
|
|
|
|
def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]: |
|
|
"""Process and format source references.""" |
|
|
processed_sources = [] |
|
|
for source in sources: |
|
|
processed_source = { |
|
|
'document_name': source.get('document_name', ''), |
|
|
'page_number': source.get('page_number'), |
|
|
'chunk_index': source.get('chunk_index'), |
|
|
'content': source.get('content', ''), |
|
|
'relevance_score': source.get('relevance_score', 0), |
|
|
'context': self._extract_context(source.get('content', ''), |
|
|
source.get('start_index', 0), |
|
|
source.get('end_index', 0)) |
|
|
} |
|
|
processed_sources.append(processed_source) |
|
|
|
|
|
return sorted(processed_sources, |
|
|
key=lambda x: x['relevance_score'], |
|
|
reverse=True) |
|
|
|
|
|
def _extract_context(self, |
|
|
content: str, |
|
|
start_idx: int, |
|
|
end_idx: int, |
|
|
context_window: int = 100) -> str: |
|
|
"""Extract context around the referenced text.""" |
|
|
start = max(0, start_idx - context_window) |
|
|
end = min(len(content), end_idx + context_window) |
|
|
|
|
|
context = content[start:end] |
|
|
if start > 0: |
|
|
context = f"...{context}" |
|
|
if end < len(content): |
|
|
context = f"{context}..." |
|
|
|
|
|
return context |
|
|
|
|
|
def _extract_sections(self, content: str) -> List[Dict[str, str]]: |
|
|
"""Extract sections for navigation.""" |
|
|
sections = [] |
|
|
current_section = None |
|
|
current_content = [] |
|
|
|
|
|
for line in content.split('\n'): |
|
|
if line.startswith('### '): |
|
|
if current_section: |
|
|
sections.append({ |
|
|
'title': current_section, |
|
|
'content': '\n'.join(current_content) |
|
|
}) |
|
|
current_section = line.replace('### ', '').strip() |
|
|
current_content = [] |
|
|
else: |
|
|
current_content.append(line) |
|
|
|
|
|
if current_section: |
|
|
sections.append({ |
|
|
'title': current_section, |
|
|
'content': '\n'.join(current_content) |
|
|
}) |
|
|
|
|
|
return sections |
|
|
|
|
|
def display_enhanced_response(response_content: str, |
|
|
sources: List[Dict[str, Any]] = None): |
|
|
""" |
|
|
Display enhanced response with source references and navigation. |
|
|
|
|
|
Args: |
|
|
response_content (str): Raw response content |
|
|
sources (List[Dict]): Source documents and metadata |
|
|
""" |
|
|
formatter = EnhancedResponseFormatter() |
|
|
formatted = formatter.format_response(response_content, sources) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns([2, 1]) |
|
|
|
|
|
with col1: |
|
|
|
|
|
if formatted['sections']: |
|
|
for section in formatted['sections']: |
|
|
with st.expander(section['title'], expanded=True): |
|
|
st.markdown(section['content']) |
|
|
|
|
|
with col2: |
|
|
st.markdown("### Source References") |
|
|
|
|
|
if formatted['sources']: |
|
|
for idx, source in enumerate(formatted['sources']): |
|
|
with st.expander(f"📄 {source['document_name']}", expanded=False): |
|
|
st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}") |
|
|
if source['page_number']: |
|
|
st.markdown(f"**Page:** {source['page_number']}") |
|
|
|
|
|
st.markdown("**Context:**") |
|
|
st.markdown(f"```\n{source['context']}\n```") |
|
|
|
|
|
if st.button("Show in Document", key=f"show_doc_{idx}"): |
|
|
show_document_context(source) |
|
|
else: |
|
|
st.info("No source references available for this response.") |
|
|
|
|
|
def show_document_context(source: Dict[str, Any]): |
|
|
"""Display the full document context in a modal.""" |
|
|
st.markdown(f"### Document: {source['document_name']}") |
|
|
|
|
|
|
|
|
tab1, tab2 = st.tabs(["Context View", "Full Document"]) |
|
|
|
|
|
with tab1: |
|
|
st.markdown("### Relevant Context") |
|
|
st.markdown(f"```\n{source['context']}\n```") |
|
|
|
|
|
with tab2: |
|
|
st.markdown("### Full Document Content") |
|
|
st.text_area("Content", |
|
|
value=source['content'], |
|
|
height=400, |
|
|
disabled=True) |