Spaces:

cryogenic22
/

RFP_Analyzer_Agent

Build error

App Files Files Community

RFP_Analyzer_Agent / utils /response_formatter.py

cryogenic22

Update utils/response_formatter.py

a56e4f2 verified about 1 year ago

raw

history blame contribute delete

7.82 kB

	"""
	Enhanced response formatter with source referencing and clean output structure.
	"""

	import streamlit as st
	import re
	from typing import Dict, List, Optional, Any
	from datetime import datetime
	import json

	class EnhancedResponseFormatter:
	def __init__(self):
	self.section_keywords = [
	"Summary",
	"Key Points",
	"Requirements",
	"Solution",
	"Approach",
	"Benefits",
	"Experience",
	"Technical Details",
	"Implementation",
	"Timeline",
	"Pricing",
	"Why Us",
	"Next Steps",
	"Recommendations"
	]

	def format_response(self,
	content: str,
	sources: List[Dict[str, Any]] = None) -> Dict[str, Any]:
	"""
	Format the AI response with enhanced structure and source references.

	Args:
	content (str): Raw response content
	sources (List[Dict]): List of source documents with their metadata

	Returns:
	Dict with formatted content and source references
	"""
	# Clean and structure the content
	cleaned_content = self._clean_content(content)
	structured_content = self._structure_content(cleaned_content)

	# Process source references
	source_references = self._process_sources(sources) if sources else []

	return {
	'content': structured_content,
	'sources': source_references,
	'sections': self._extract_sections(structured_content)
	}

	def _clean_content(self, content: str) -> str:
	"""Clean and normalize the content."""
	# Remove multiple newlines
	content = re.sub(r'\n{3,}', '\n\n', content)

	# Ensure consistent heading formatting
	for keyword in self.section_keywords:
	pattern = rf'(?i)({keyword}:?)\s*\n'
	content = re.sub(pattern, f'### {keyword}\n\n', content)

	# Format lists consistently
	content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
	content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)

	return content

	def _structure_content(self, content: str) -> str:
	"""Add structural elements and formatting to the content."""
	# Split into sections
	sections = re.split(r'(?m)^###\s+', content)

	# If no sections found, add default structure
	if len(sections) == 1:
	return "### Response\n\n" + content

	# Process each section
	formatted_sections = []
	for section in sections:
	if section.strip():
	# Extract section title and content
	lines = section.split('\n', 1)
	if len(lines) == 2:
	title, content = lines
	formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n")

	return "\n".join(formatted_sections)

	def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""Process and format source references."""
	processed_sources = []
	for source in sources:
	processed_source = {
	'document_name': source.get('document_name', ''),
	'page_number': source.get('page_number'),
	'chunk_index': source.get('chunk_index'),
	'content': source.get('content', ''),
	'relevance_score': source.get('relevance_score', 0),
	'context': self._extract_context(source.get('content', ''),
	source.get('start_index', 0),
	source.get('end_index', 0))
	}
	processed_sources.append(processed_source)

	return sorted(processed_sources,
	key=lambda x: x['relevance_score'],
	reverse=True)

	def _extract_context(self,
	content: str,
	start_idx: int,
	end_idx: int,
	context_window: int = 100) -> str:
	"""Extract context around the referenced text."""
	start = max(0, start_idx - context_window)
	end = min(len(content), end_idx + context_window)

	context = content[start:end]
	if start > 0:
	context = f"...{context}"
	if end < len(content):
	context = f"{context}..."

	return context

	def _extract_sections(self, content: str) -> List[Dict[str, str]]:
	"""Extract sections for navigation."""
	sections = []
	current_section = None
	current_content = []

	for line in content.split('\n'):
	if line.startswith('### '):
	if current_section:
	sections.append({
	'title': current_section,
	'content': '\n'.join(current_content)
	})
	current_section = line.replace('### ', '').strip()
	current_content = []
	else:
	current_content.append(line)

	if current_section:
	sections.append({
	'title': current_section,
	'content': '\n'.join(current_content)
	})

	return sections

	def display_enhanced_response(response_content: str,
	sources: List[Dict[str, Any]] = None):
	"""
	Display enhanced response with source references and navigation.

	Args:
	response_content (str): Raw response content
	sources (List[Dict]): Source documents and metadata
	"""
	formatter = EnhancedResponseFormatter()
	formatted = formatter.format_response(response_content, sources)

	# Create two columns: main content and source references
	col1, col2 = st.columns([2, 1])

	with col1:
	# Display main content
	if formatted['sections']:
	for section in formatted['sections']:
	with st.expander(section['title'], expanded=True):
	st.markdown(section['content'])

	with col2:
	st.markdown("### Source References")

	if formatted['sources']:
	for idx, source in enumerate(formatted['sources']):
	with st.expander(f"📄 {source['document_name']}", expanded=False):
	st.markdown(f"Relevance Score: {source['relevance_score']:.2f}")
	if source['page_number']:
	st.markdown(f"Page: {source['page_number']}")

	st.markdown("Context:")
	st.markdown(f"```\n{source['context']}\n```")

	if st.button("Show in Document", key=f"show_doc_{idx}"):
	show_document_context(source)
	else:
	st.info("No source references available for this response.")

	def show_document_context(source: Dict[str, Any]):
	"""Display the full document context in a modal."""
	st.markdown(f"### Document: {source['document_name']}")

	# Create tabs for different views
	tab1, tab2 = st.tabs(["Context View", "Full Document"])

	with tab1:
	st.markdown("### Relevant Context")
	st.markdown(f"```\n{source['context']}\n```")

	with tab2:
	st.markdown("### Full Document Content")
	st.text_area("Content",
	value=source['content'],
	height=400,
	disabled=True)