cryogenic22 commited on
Commit
a56e4f2
·
verified ·
1 Parent(s): 676d600

Update utils/response_formatter.py

Browse files
Files changed (1) hide show
  1. utils/response_formatter.py +157 -91
utils/response_formatter.py CHANGED
@@ -1,146 +1,212 @@
1
- # utils/response_formatter.py
 
 
2
 
3
- import re
4
- from typing import Dict, Optional
5
  import streamlit as st
 
 
6
  from datetime import datetime
 
7
 
8
- class ResponseFormatter:
9
  def __init__(self):
10
  self.section_keywords = [
11
  "Summary",
12
- "Overview",
13
- "Background",
14
  "Solution",
15
  "Approach",
16
  "Benefits",
17
  "Experience",
18
- "Methodology",
19
  "Implementation",
20
  "Timeline",
21
  "Pricing",
22
  "Why Us",
23
- "Next Steps"
 
24
  ]
25
 
26
- def format_response(self, content: str, metadata: Optional[Dict] = None) -> str:
 
 
27
  """
28
- Format the AI response into a clean, structured format.
29
 
30
  Args:
31
  content (str): Raw response content
32
- metadata (Optional[Dict]): Additional metadata like sources
33
 
34
  Returns:
35
- str: Formatted response
36
  """
37
- # Clean up the content
38
- formatted = self._clean_content(content)
 
39
 
40
- # Add structural formatting
41
- formatted = self._add_structure(formatted)
42
 
43
- # Format lists and bullet points
44
- formatted = self._format_lists(formatted)
45
-
46
- # Add source references if available
47
- if metadata and 'sources' in metadata:
48
- formatted = self._add_sources(formatted, metadata['sources'])
49
-
50
- return formatted
51
 
52
  def _clean_content(self, content: str) -> str:
53
  """Clean and normalize the content."""
54
  # Remove multiple newlines
55
- cleaned = re.sub(r'\n{3,}', '\n\n', content)
56
 
57
  # Ensure consistent heading formatting
58
  for keyword in self.section_keywords:
59
  pattern = rf'(?i)({keyword}:?)\s*\n'
60
- cleaned = re.sub(pattern, f'### {keyword}\n\n', cleaned)
61
 
62
- return cleaned
 
 
 
 
63
 
64
- def _add_structure(self, content: str) -> str:
65
- """Add structural elements to the content."""
66
- # Add section breaks
67
  sections = re.split(r'(?m)^###\s+', content)
68
 
69
- if len(sections) == 1: # No sections found
70
- # Add a default section if none exists
71
- content = "### Response\n\n" + content
72
 
73
- # Add copy button for each section
74
- structured = ""
75
  for section in sections:
76
  if section.strip():
77
- section_title = section.split('\n')[0].strip()
78
- section_content = '\n'.join(section.split('\n')[1:]).strip()
79
-
80
- structured += f"### {section_title}\n\n"
81
- structured += f"{section_content}\n\n"
82
-
83
- return structured
84
 
85
- def _format_lists(self, content: str) -> str:
86
- """Format lists and bullet points consistently."""
87
- # Format bullet points
88
- content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # Format numbered lists
91
- content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
 
 
 
 
 
 
 
 
 
 
92
 
93
- return content
 
 
 
 
 
 
94
 
95
- def _add_sources(self, content: str, sources: list) -> str:
96
- """Add source references to the content."""
97
- if sources:
98
- content += "\n\n---\n\n### Sources\n\n"
99
- for source in sources:
100
- content += f"• {source}\n"
101
 
102
- return content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- def display_formatted_response(response_content: str, metadata: Optional[Dict] = None):
 
105
  """
106
- Display a formatted response in the Streamlit interface.
107
 
108
  Args:
109
  response_content (str): Raw response content
110
- metadata (Optional[Dict]): Additional metadata like sources
111
  """
112
- formatter = ResponseFormatter()
113
- formatted_content = formatter.format_response(response_content, metadata)
114
 
115
- # Display formatted content
116
- st.markdown(formatted_content)
117
 
118
- # Add copy buttons for sections
119
- sections = re.split(r'(?m)^###\s+', formatted_content)
120
- for section in sections:
121
- if section.strip():
122
- section_title = section.split('\n')[0].strip()
123
- section_content = '\n'.join(section.split('\n')[1:]).strip()
124
-
125
- with st.expander(f"Copy {section_title}", expanded=False):
126
- st.text_area(
127
- "Copy this content:",
128
- value=section_content,
129
- height=200,
130
- key=f"copy_{section_title}_{datetime.now().timestamp()}"
131
- )
132
- if st.button(f"Copy {section_title}", key=f"btn_{section_title}_{datetime.now().timestamp()}"):
133
- st.write("Content copied to clipboard!")
 
 
 
 
 
 
 
 
134
 
135
- # Example usage in your chat interface:
136
- """
137
- def display_chat_message(message, is_user=False):
138
- if is_user:
139
- st.chat_message("user").write(message)
140
- else:
141
- with st.chat_message("assistant"):
142
- display_formatted_response(
143
- message.content,
144
- metadata=getattr(message, 'metadata', None)
145
- )
146
- """
 
 
 
 
 
 
1
+ """
2
+ Enhanced response formatter with source referencing and clean output structure.
3
+ """
4
 
 
 
5
  import streamlit as st
6
+ import re
7
+ from typing import Dict, List, Optional, Any
8
  from datetime import datetime
9
+ import json
10
 
11
+ class EnhancedResponseFormatter:
12
  def __init__(self):
13
  self.section_keywords = [
14
  "Summary",
15
+ "Key Points",
16
+ "Requirements",
17
  "Solution",
18
  "Approach",
19
  "Benefits",
20
  "Experience",
21
+ "Technical Details",
22
  "Implementation",
23
  "Timeline",
24
  "Pricing",
25
  "Why Us",
26
+ "Next Steps",
27
+ "Recommendations"
28
  ]
29
 
30
+ def format_response(self,
31
+ content: str,
32
+ sources: List[Dict[str, Any]] = None) -> Dict[str, Any]:
33
  """
34
+ Format the AI response with enhanced structure and source references.
35
 
36
  Args:
37
  content (str): Raw response content
38
+ sources (List[Dict]): List of source documents with their metadata
39
 
40
  Returns:
41
+ Dict with formatted content and source references
42
  """
43
+ # Clean and structure the content
44
+ cleaned_content = self._clean_content(content)
45
+ structured_content = self._structure_content(cleaned_content)
46
 
47
+ # Process source references
48
+ source_references = self._process_sources(sources) if sources else []
49
 
50
+ return {
51
+ 'content': structured_content,
52
+ 'sources': source_references,
53
+ 'sections': self._extract_sections(structured_content)
54
+ }
 
 
 
55
 
56
  def _clean_content(self, content: str) -> str:
57
  """Clean and normalize the content."""
58
  # Remove multiple newlines
59
+ content = re.sub(r'\n{3,}', '\n\n', content)
60
 
61
  # Ensure consistent heading formatting
62
  for keyword in self.section_keywords:
63
  pattern = rf'(?i)({keyword}:?)\s*\n'
64
+ content = re.sub(pattern, f'### {keyword}\n\n', content)
65
 
66
+ # Format lists consistently
67
+ content = re.sub(r'(?m)^[\-\*]\s+', '• ', content)
68
+ content = re.sub(r'(?m)^\d+\.\s+', lambda m: f"{int(m.group().split('.')[0])}. ", content)
69
+
70
+ return content
71
 
72
+ def _structure_content(self, content: str) -> str:
73
+ """Add structural elements and formatting to the content."""
74
+ # Split into sections
75
  sections = re.split(r'(?m)^###\s+', content)
76
 
77
+ # If no sections found, add default structure
78
+ if len(sections) == 1:
79
+ return "### Response\n\n" + content
80
 
81
+ # Process each section
82
+ formatted_sections = []
83
  for section in sections:
84
  if section.strip():
85
+ # Extract section title and content
86
+ lines = section.split('\n', 1)
87
+ if len(lines) == 2:
88
+ title, content = lines
89
+ formatted_sections.append(f"### {title.strip()}\n\n{content.strip()}\n\n")
90
+
91
+ return "\n".join(formatted_sections)
92
 
93
+ def _process_sources(self, sources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
94
+ """Process and format source references."""
95
+ processed_sources = []
96
+ for source in sources:
97
+ processed_source = {
98
+ 'document_name': source.get('document_name', ''),
99
+ 'page_number': source.get('page_number'),
100
+ 'chunk_index': source.get('chunk_index'),
101
+ 'content': source.get('content', ''),
102
+ 'relevance_score': source.get('relevance_score', 0),
103
+ 'context': self._extract_context(source.get('content', ''),
104
+ source.get('start_index', 0),
105
+ source.get('end_index', 0))
106
+ }
107
+ processed_sources.append(processed_source)
108
 
109
+ return sorted(processed_sources,
110
+ key=lambda x: x['relevance_score'],
111
+ reverse=True)
112
+
113
+ def _extract_context(self,
114
+ content: str,
115
+ start_idx: int,
116
+ end_idx: int,
117
+ context_window: int = 100) -> str:
118
+ """Extract context around the referenced text."""
119
+ start = max(0, start_idx - context_window)
120
+ end = min(len(content), end_idx + context_window)
121
 
122
+ context = content[start:end]
123
+ if start > 0:
124
+ context = f"...{context}"
125
+ if end < len(content):
126
+ context = f"{context}..."
127
+
128
+ return context
129
 
130
+ def _extract_sections(self, content: str) -> List[Dict[str, str]]:
131
+ """Extract sections for navigation."""
132
+ sections = []
133
+ current_section = None
134
+ current_content = []
 
135
 
136
+ for line in content.split('\n'):
137
+ if line.startswith('### '):
138
+ if current_section:
139
+ sections.append({
140
+ 'title': current_section,
141
+ 'content': '\n'.join(current_content)
142
+ })
143
+ current_section = line.replace('### ', '').strip()
144
+ current_content = []
145
+ else:
146
+ current_content.append(line)
147
+
148
+ if current_section:
149
+ sections.append({
150
+ 'title': current_section,
151
+ 'content': '\n'.join(current_content)
152
+ })
153
+
154
+ return sections
155
 
156
+ def display_enhanced_response(response_content: str,
157
+ sources: List[Dict[str, Any]] = None):
158
  """
159
+ Display enhanced response with source references and navigation.
160
 
161
  Args:
162
  response_content (str): Raw response content
163
+ sources (List[Dict]): Source documents and metadata
164
  """
165
+ formatter = EnhancedResponseFormatter()
166
+ formatted = formatter.format_response(response_content, sources)
167
 
168
+ # Create two columns: main content and source references
169
+ col1, col2 = st.columns([2, 1])
170
 
171
+ with col1:
172
+ # Display main content
173
+ if formatted['sections']:
174
+ for section in formatted['sections']:
175
+ with st.expander(section['title'], expanded=True):
176
+ st.markdown(section['content'])
177
+
178
+ with col2:
179
+ st.markdown("### Source References")
180
+
181
+ if formatted['sources']:
182
+ for idx, source in enumerate(formatted['sources']):
183
+ with st.expander(f"📄 {source['document_name']}", expanded=False):
184
+ st.markdown(f"**Relevance Score:** {source['relevance_score']:.2f}")
185
+ if source['page_number']:
186
+ st.markdown(f"**Page:** {source['page_number']}")
187
+
188
+ st.markdown("**Context:**")
189
+ st.markdown(f"```\n{source['context']}\n```")
190
+
191
+ if st.button("Show in Document", key=f"show_doc_{idx}"):
192
+ show_document_context(source)
193
+ else:
194
+ st.info("No source references available for this response.")
195
 
196
+ def show_document_context(source: Dict[str, Any]):
197
+ """Display the full document context in a modal."""
198
+ st.markdown(f"### Document: {source['document_name']}")
199
+
200
+ # Create tabs for different views
201
+ tab1, tab2 = st.tabs(["Context View", "Full Document"])
202
+
203
+ with tab1:
204
+ st.markdown("### Relevant Context")
205
+ st.markdown(f"```\n{source['context']}\n```")
206
+
207
+ with tab2:
208
+ st.markdown("### Full Document Content")
209
+ st.text_area("Content",
210
+ value=source['content'],
211
+ height=400,
212
+ disabled=True)