JatsTheAIGen commited on
Commit
bb3909a
·
1 Parent(s): 624de5a

Major performance and UX improvements: caching, visual outputs, dynamic tokens, enhanced document processing

Browse files
Files changed (4) hide show
  1. agents.py +74 -16
  2. app.py +26 -14
  3. utils/__init__.py +79 -8
  4. utils/visual_output.py +262 -0
agents.py CHANGED
@@ -5,7 +5,8 @@ import logging
5
  from typing import Optional, Dict, Any, List, AsyncGenerator
6
  import time
7
 
8
- from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata
 
9
  from config import Config
10
 
11
  logger = logging.getLogger(__name__)
@@ -33,9 +34,44 @@ class BaseAgent:
33
  # Core Analysis Agent
34
  # --------------------
35
  class AnalysisAgent(BaseAgent):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
37
  start_time = time.time()
38
 
 
 
 
 
 
 
 
39
  if file_path:
40
  # Get document metadata
41
  metadata = get_document_metadata(file_path)
@@ -45,29 +81,46 @@ class AnalysisAgent(BaseAgent):
45
 
46
  # Check if document needs chunking
47
  if len(text) > Config.CHUNK_SIZE:
48
- return await self._handle_large_document(prompt, text, metadata)
49
  else:
50
  content = f"User prompt: {prompt}\n\nDocument text:\n{text}"
 
51
  else:
52
  content = f"User prompt: {prompt}"
53
  metadata = {}
 
54
 
55
- system = """You are AnalysisAgent: produce well-structured, readable insights and summaries.
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- FORMATTING REQUIREMENTS:
58
  - Use clear section headers with emojis (## 📋 Key Points, ## 🔍 Analysis, etc.)
59
- - Use bullet points and numbered lists for better readability
 
60
  - Include visual separators (---) between major sections
61
- - Use bold text for important concepts
62
- - Add technical details in organized subsections
63
- - Include actionable insights with clear next steps
64
 
65
  CONTENT REQUIREMENTS:
66
- - Adapt language and complexity to the target audience
67
- - Provide clear, actionable insights with examples
68
- - Use analogies for complex topics
69
- - Include quantitative details when available
70
- - Structure information hierarchically for easy scanning"""
 
71
 
72
  try:
73
  response = await call_openai_chat(
@@ -75,23 +128,28 @@ CONTENT REQUIREMENTS:
75
  messages=[{"role": "system", "content": system},
76
  {"role": "user", "content": content}],
77
  temperature=Config.OPENAI_TEMPERATURE,
78
- max_tokens=Config.OPENAI_MAX_TOKENS
79
  )
80
  except Exception as e:
81
  logger.exception("AnalysisAgent failed")
82
  response = f"Error during analysis: {str(e)}"
83
 
 
 
 
84
  self.tasks_completed += 1
85
 
86
  # Add processing metadata
87
  processing_time = time.time() - start_time
88
  result = {
89
- "analysis": response,
90
  "metadata": {
91
  "processing_time": round(processing_time, 2),
92
  "document_metadata": metadata,
93
  "agent": self.name,
94
- "tasks_completed": self.tasks_completed
 
 
95
  }
96
  }
97
 
 
5
  from typing import Optional, Dict, Any, List, AsyncGenerator
6
  import time
7
 
8
+ from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata, get_cached_analysis, cache_analysis
9
+ from utils.visual_output import VisualOutputGenerator
10
  from config import Config
11
 
12
  logger = logging.getLogger(__name__)
 
34
  # Core Analysis Agent
35
  # --------------------
36
  class AnalysisAgent(BaseAgent):
37
+ def __init__(self, name: str, model: str, tasks_completed: int = 0):
38
+ super().__init__(name, model, tasks_completed)
39
+ self.visual_generator = VisualOutputGenerator()
40
+
41
+ def _calculate_dynamic_tokens(self, prompt: str, text_length: int) -> int:
42
+ """Calculate dynamic token allocation based on prompt complexity and text length"""
43
+ base_tokens = Config.OPENAI_MAX_TOKENS
44
+
45
+ # Increase tokens for complex prompts
46
+ complex_keywords = ['analyze', 'comprehensive', 'detailed', 'thorough', 'complete', 'extensive']
47
+ complexity_multiplier = 1.0
48
+ for keyword in complex_keywords:
49
+ if keyword.lower() in prompt.lower():
50
+ complexity_multiplier += 0.3
51
+
52
+ # Increase tokens for longer documents
53
+ length_multiplier = min(2.0, 1.0 + (text_length / 50000)) # Cap at 2x for very long docs
54
+
55
+ # Increase tokens for specific document types
56
+ doc_type_keywords = ['whitepaper', 'research', 'technical', 'financial', 'legal', 'academic']
57
+ doc_type_multiplier = 1.0
58
+ for keyword in doc_type_keywords:
59
+ if keyword.lower() in prompt.lower():
60
+ doc_type_multiplier += 0.2
61
+
62
+ final_tokens = int(base_tokens * complexity_multiplier * length_multiplier * doc_type_multiplier)
63
+ return min(final_tokens, 4000) # Cap at 4000 tokens
64
+
65
  async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
66
  start_time = time.time()
67
 
68
+ # Check cache first
69
+ if file_path:
70
+ cached_result = get_cached_analysis(file_path, prompt)
71
+ if cached_result:
72
+ logger.info(f"Returning cached analysis for {file_path}")
73
+ return cached_result
74
+
75
  if file_path:
76
  # Get document metadata
77
  metadata = get_document_metadata(file_path)
 
81
 
82
  # Check if document needs chunking
83
  if len(text) > Config.CHUNK_SIZE:
84
+ result = await self._handle_large_document(prompt, text, metadata)
85
  else:
86
  content = f"User prompt: {prompt}\n\nDocument text:\n{text}"
87
+ result = await self._process_content(prompt, content, metadata, text)
88
  else:
89
  content = f"User prompt: {prompt}"
90
  metadata = {}
91
+ result = await self._process_content(prompt, content, metadata, "")
92
 
93
+ # Cache the result
94
+ if file_path:
95
+ cache_analysis(file_path, prompt, result)
96
+
97
+ return result
98
+
99
+ async def _process_content(self, prompt: str, content: str, metadata: Dict[str, Any], text: str) -> Dict[str, Any]:
100
+ """Process content with dynamic token allocation and visual formatting"""
101
+ start_time = time.time()
102
+
103
+ # Calculate dynamic tokens
104
+ max_tokens = self._calculate_dynamic_tokens(prompt, len(text))
105
+
106
+ system = """You are AnalysisAgent: produce crisp, visually appealing, and highly readable insights.
107
 
108
+ CRITICAL FORMATTING REQUIREMENTS:
109
  - Use clear section headers with emojis (## 📋 Key Points, ## 🔍 Analysis, etc.)
110
+ - Create concise bullet points (max 1 line each)
111
+ - Use tables for data comparison when appropriate
112
  - Include visual separators (---) between major sections
113
+ - Use **bold** for key concepts and numbers
114
+ - Keep sections short and scannable
115
+ - Prioritize actionable insights over lengthy explanations
116
 
117
  CONTENT REQUIREMENTS:
118
+ - Be concise and to the point
119
+ - Use simple language even for technical topics
120
+ - Include specific numbers, percentages, and metrics
121
+ - Provide clear next steps or recommendations
122
+ - Structure information for quick scanning
123
+ - Focus on what matters most to the user"""
124
 
125
  try:
126
  response = await call_openai_chat(
 
128
  messages=[{"role": "system", "content": system},
129
  {"role": "user", "content": content}],
130
  temperature=Config.OPENAI_TEMPERATURE,
131
+ max_tokens=max_tokens
132
  )
133
  except Exception as e:
134
  logger.exception("AnalysisAgent failed")
135
  response = f"Error during analysis: {str(e)}"
136
 
137
+ # Enhance with visual formatting
138
+ visual_response = self.visual_generator.format_analysis_with_visuals(response, metadata)
139
+
140
  self.tasks_completed += 1
141
 
142
  # Add processing metadata
143
  processing_time = time.time() - start_time
144
  result = {
145
+ "analysis": visual_response,
146
  "metadata": {
147
  "processing_time": round(processing_time, 2),
148
  "document_metadata": metadata,
149
  "agent": self.name,
150
+ "tasks_completed": self.tasks_completed,
151
+ "tokens_used": max_tokens,
152
+ "cached": False
153
  }
154
  }
155
 
app.py CHANGED
@@ -87,16 +87,30 @@ def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
87
  validate_file_size(file)
88
  path = save_uploaded_file(file, username)
89
 
90
- result = run_async(
91
- ORCHESTRATOR.handle_user_prompt,
92
- user_id=username,
93
- prompt=prompt,
94
- file_path=path,
95
- targets=["analysis"]
96
- )
97
- return result.get("analysis", "No analysis result."), None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  except Exception as e:
99
- return f"Error during analysis: {str(e)}", None, None
100
 
101
  def handle_batch_analysis(files, prompt, username="anonymous"):
102
  """Handle batch analysis of multiple PDFs"""
@@ -240,11 +254,9 @@ with gr.Blocks(title="PDF Analysis & Orchestrator", theme=gr.themes.Soft()) as d
240
  label="Analysis Result",
241
  show_copy_button=True
242
  )
243
- status_box = gr.Textbox(
244
- label="🔄 Status",
245
- value="Ready to analyze documents",
246
- interactive=False,
247
- info="Current processing status will appear here"
248
  )
249
 
250
  with gr.Column(scale=1):
 
87
  validate_file_size(file)
88
  path = save_uploaded_file(file, username)
89
 
90
+ # Check if this is a cached result
91
+ from utils import get_cached_analysis
92
+ cached_result = get_cached_analysis(path, prompt)
93
+
94
+ if cached_result:
95
+ status = "⚡ **Cached Result** - Instant response from previous analysis"
96
+ result = cached_result.get("analysis", "No analysis result.")
97
+ metadata = cached_result.get("metadata", {})
98
+ else:
99
+ status = "🔄 **Processing** - Analyzing document with AI..."
100
+ result = run_async(
101
+ ORCHESTRATOR.handle_user_prompt,
102
+ user_id=username,
103
+ prompt=prompt,
104
+ file_path=path,
105
+ targets=["analysis"]
106
+ )
107
+ result = result.get("analysis", "No analysis result.")
108
+ metadata = result.get("metadata", {}) if isinstance(result, dict) else {}
109
+ status = "✅ **Analysis Complete** - Fresh analysis generated"
110
+
111
+ return result, status, metadata
112
  except Exception as e:
113
+ return f"Error during analysis: {str(e)}", f"❌ **Error** - {str(e)}", None
114
 
115
  def handle_batch_analysis(files, prompt, username="anonymous"):
116
  """Handle batch analysis of multiple PDFs"""
 
254
  label="Analysis Result",
255
  show_copy_button=True
256
  )
257
+ status_box = gr.Markdown(
258
+ value="**🔄 Status:** Ready to analyze documents\n\n**💡 Tip:** Same document + same prompt = instant cached response!",
259
+ label="Status & Performance"
 
 
260
  )
261
 
262
  with gr.Column(scale=1):
utils/__init__.py CHANGED
@@ -48,12 +48,46 @@ async def call_openai_chat(model: str, messages: list, temperature=0.2, max_toke
48
  # PDF Utilities
49
  # ------------------------
50
  def load_pdf_text(path: str) -> str:
51
- """Extract text from PDF using pdfplumber"""
52
- text = []
53
  with pdfplumber.open(path) as pdf:
54
- for p in pdf.pages:
55
- text.append(p.extract_text() or "")
56
- return "\n\n".join(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def save_text_as_file(text: str, suffix=".txt") -> str:
59
  """Save text to a temporary file"""
@@ -111,15 +145,52 @@ def get_file_hash(file_path: str) -> str:
111
  return hashlib.md5(f.read()).hexdigest()
112
 
113
  # ------------------------
114
- # Caching System
115
  # ------------------------
116
  CACHE_DIR = Path(tempfile.gettempdir()) / "pdf_analysis_cache"
117
  CACHE_DIR.mkdir(exist_ok=True)
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def get_cached_text(file_path: str) -> Optional[str]:
120
  """Retrieve cached PDF text if available"""
121
  file_hash = get_file_hash(file_path)
122
- cache_file = CACHE_DIR / f"{file_hash}.json"
123
 
124
  if cache_file.exists():
125
  try:
@@ -135,7 +206,7 @@ def get_cached_text(file_path: str) -> Optional[str]:
135
  def cache_text(file_path: str, text: str) -> None:
136
  """Cache PDF text for future use"""
137
  file_hash = get_file_hash(file_path)
138
- cache_file = CACHE_DIR / f"{file_hash}.json"
139
 
140
  try:
141
  cache_data = {
 
48
  # PDF Utilities
49
  # ------------------------
50
  def load_pdf_text(path: str) -> str:
51
+ """Extract comprehensive content from PDF using pdfplumber"""
52
+ content = []
53
  with pdfplumber.open(path) as pdf:
54
+ for page_num, page in enumerate(pdf.pages, 1):
55
+ page_content = []
56
+
57
+ # Extract text
58
+ text = page.extract_text()
59
+ if text:
60
+ page_content.append(f"=== PAGE {page_num} TEXT ===")
61
+ page_content.append(text)
62
+
63
+ # Extract tables
64
+ tables = page.extract_tables()
65
+ if tables:
66
+ page_content.append(f"\n=== PAGE {page_num} TABLES ===")
67
+ for table_num, table in enumerate(tables, 1):
68
+ page_content.append(f"\n--- TABLE {table_num} ---")
69
+ for row in table:
70
+ if row: # Skip empty rows
71
+ # Clean and format table row
72
+ clean_row = [cell.strip() if cell else "" for cell in row]
73
+ page_content.append(" | ".join(clean_row))
74
+
75
+ # Extract images info (metadata only)
76
+ images = page.images
77
+ if images:
78
+ page_content.append(f"\n=== PAGE {page_num} IMAGES ===")
79
+ for img_num, img in enumerate(images, 1):
80
+ page_content.append(f"Image {img_num}: {img.get('width', 'unknown')}x{img.get('height', 'unknown')} pixels")
81
+
82
+ # Extract metadata
83
+ page_content.append(f"\n=== PAGE {page_num} METADATA ===")
84
+ page_content.append(f"Page size: {page.width}x{page.height}")
85
+ page_content.append(f"Rotation: {page.rotation}")
86
+
87
+ if page_content:
88
+ content.append("\n".join(page_content))
89
+
90
+ return "\n\n".join(content)
91
 
92
  def save_text_as_file(text: str, suffix=".txt") -> str:
93
  """Save text to a temporary file"""
 
145
  return hashlib.md5(f.read()).hexdigest()
146
 
147
  # ------------------------
148
+ # Enhanced Caching System
149
  # ------------------------
150
  CACHE_DIR = Path(tempfile.gettempdir()) / "pdf_analysis_cache"
151
  CACHE_DIR.mkdir(exist_ok=True)
152
 
153
+ def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]:
154
+ """Retrieve cached analysis if available"""
155
+ file_hash = get_file_hash(file_path)
156
+ prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
157
+ cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
158
+
159
+ if cache_file.exists():
160
+ try:
161
+ with open(cache_file, 'r', encoding='utf-8') as f:
162
+ cache_data = json.load(f)
163
+ # Check if file hasn't been modified and cache is still valid (24 hours)
164
+ if (cache_data.get('file_hash') == file_hash and
165
+ cache_data.get('prompt_hash') == prompt_hash and
166
+ time.time() - cache_data.get('cached_at', 0) < 86400): # 24 hours
167
+ return cache_data.get('analysis')
168
+ except Exception:
169
+ pass
170
+ return None
171
+
172
+ def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> None:
173
+ """Cache analysis results for future use"""
174
+ file_hash = get_file_hash(file_path)
175
+ prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
176
+ cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
177
+
178
+ try:
179
+ cache_data = {
180
+ 'file_hash': file_hash,
181
+ 'prompt_hash': prompt_hash,
182
+ 'analysis': analysis,
183
+ 'cached_at': time.time()
184
+ }
185
+ with open(cache_file, 'w', encoding='utf-8') as f:
186
+ json.dump(cache_data, f, ensure_ascii=False)
187
+ except Exception:
188
+ pass # Fail silently if caching fails
189
+
190
  def get_cached_text(file_path: str) -> Optional[str]:
191
  """Retrieve cached PDF text if available"""
192
  file_hash = get_file_hash(file_path)
193
+ cache_file = CACHE_DIR / f"{file_hash}_text.json"
194
 
195
  if cache_file.exists():
196
  try:
 
206
  def cache_text(file_path: str, text: str) -> None:
207
  """Cache PDF text for future use"""
208
  file_hash = get_file_hash(file_path)
209
+ cache_file = CACHE_DIR / f"{file_hash}_text.json"
210
 
211
  try:
212
  cache_data = {
utils/visual_output.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/visual_output.py - Visual output generation for PDF Analysis & Orchestrator
2
+ import json
3
+ import re
4
+ from typing import Dict, List, Any, Optional
5
+ from datetime import datetime
6
+
7
+ class VisualOutputGenerator:
8
+ """Generate visual representations of analysis results"""
9
+
10
+ def __init__(self):
11
+ self.visual_elements = []
12
+
13
+ def create_infographic(self, data: Dict[str, Any], title: str = "Analysis Summary") -> str:
14
+ """Create an infographic-style summary"""
15
+ visual = f"""
16
+ ## 📊 {title}
17
+
18
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; color: white; margin: 10px 0;">
19
+ """
20
+
21
+ # Key metrics
22
+ if 'metrics' in data:
23
+ visual += f"""
24
+ <div style="display: flex; justify-content: space-around; margin: 20px 0;">
25
+ """
26
+ for metric, value in data['metrics'].items():
27
+ visual += f"""
28
+ <div style="text-align: center; background: rgba(255,255,255,0.2); padding: 15px; border-radius: 8px; margin: 5px;">
29
+ <h3 style="margin: 0; font-size: 24px;">{value}</h3>
30
+ <p style="margin: 5px 0 0 0; font-size: 14px;">{metric}</p>
31
+ </div>
32
+ """
33
+ visual += "</div>"
34
+
35
+ visual += "</div>"
36
+ return visual
37
+
38
+ def create_data_table(self, data: List[Dict[str, Any]], title: str = "Data Table") -> str:
39
+ """Create a formatted table from data"""
40
+ if not data:
41
+ return ""
42
+
43
+ # Get headers from first row
44
+ headers = list(data[0].keys())
45
+
46
+ table = f"""
47
+ ## 📋 {title}
48
+
49
+ | {' | '.join(headers)} |
50
+ | {' | '.join(['---'] * len(headers))} |
51
+ """
52
+
53
+ for row in data:
54
+ values = [str(row.get(header, '')) for header in headers]
55
+ table += f"| {' | '.join(values)} |\n"
56
+
57
+ return table
58
+
59
+ def create_progress_bar(self, value: float, max_value: float, label: str) -> str:
60
+ """Create a progress bar visualization"""
61
+ percentage = min(100, (value / max_value) * 100) if max_value > 0 else 0
62
+
63
+ return f"""
64
+ <div style="margin: 10px 0;">
65
+ <p style="margin: 5px 0; font-weight: bold;">{label}: {value:.1f}/{max_value:.1f} ({percentage:.1f}%)</p>
66
+ <div style="background: #e0e0e0; border-radius: 10px; height: 20px; overflow: hidden;">
67
+ <div style="background: linear-gradient(90deg, #4CAF50, #8BC34A); height: 100%; width: {percentage}%; transition: width 0.3s ease;"></div>
68
+ </div>
69
+ </div>
70
+ """
71
+
72
+ def create_timeline(self, events: List[Dict[str, str]], title: str = "Timeline") -> str:
73
+ """Create a timeline visualization"""
74
+ timeline = f"""
75
+ ## ⏰ {title}
76
+
77
+ <div style="position: relative; padding-left: 30px; margin: 20px 0;">
78
+ """
79
+
80
+ for i, event in enumerate(events):
81
+ timeline += f"""
82
+ <div style="position: relative; margin-bottom: 20px;">
83
+ <div style="position: absolute; left: -25px; top: 5px; width: 12px; height: 12px; background: #4CAF50; border-radius: 50%; border: 3px solid white; box-shadow: 0 0 0 3px #4CAF50;"></div>
84
+ <div style="background: #f5f5f5; padding: 15px; border-radius: 8px; border-left: 4px solid #4CAF50;">
85
+ <h4 style="margin: 0 0 5px 0; color: #333;">{event.get('title', 'Event')}</h4>
86
+ <p style="margin: 0; color: #666;">{event.get('description', '')}</p>
87
+ <small style="color: #999;">{event.get('date', '')}</small>
88
+ </div>
89
+ </div>
90
+ """
91
+
92
+ timeline += "</div>"
93
+ return timeline
94
+
95
+ def create_comparison_chart(self, data: Dict[str, float], title: str = "Comparison") -> str:
96
+ """Create a comparison chart"""
97
+ if not data:
98
+ return ""
99
+
100
+ max_value = max(data.values()) if data.values() else 1
101
+
102
+ chart = f"""
103
+ ## 📈 {title}
104
+
105
+ <div style="margin: 20px 0;">
106
+ """
107
+
108
+ for label, value in data.items():
109
+ percentage = (value / max_value) * 100
110
+ chart += f"""
111
+ <div style="margin: 10px 0;">
112
+ <div style="display: flex; justify-content: space-between; margin-bottom: 5px;">
113
+ <span style="font-weight: bold;">{label}</span>
114
+ <span style="color: #666;">{value:.1f}</span>
115
+ </div>
116
+ <div style="background: #e0e0e0; border-radius: 5px; height: 20px; overflow: hidden;">
117
+ <div style="background: linear-gradient(90deg, #2196F3, #21CBF3); height: 100%; width: {percentage}%; transition: width 0.3s ease;"></div>
118
+ </div>
119
+ </div>
120
+ """
121
+
122
+ chart += "</div>"
123
+ return chart
124
+
125
+ def create_key_points(self, points: List[str], title: str = "Key Points") -> str:
126
+ """Create a visually appealing key points section"""
127
+ if not points:
128
+ return ""
129
+
130
+ visual = f"""
131
+ ## 💡 {title}
132
+
133
+ <div style="display: grid; gap: 15px; margin: 20px 0;">
134
+ """
135
+
136
+ for i, point in enumerate(points, 1):
137
+ visual += f"""
138
+ <div style="background: #f8f9fa; border-left: 4px solid #007bff; padding: 15px; border-radius: 0 8px 8px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
139
+ <div style="display: flex; align-items: flex-start;">
140
+ <span style="background: #007bff; color: white; border-radius: 50%; width: 24px; height: 24px; display: flex; align-items: center; justify-content: center; font-weight: bold; margin-right: 12px; flex-shrink: 0;">{i}</span>
141
+ <p style="margin: 0; line-height: 1.5;">{point}</p>
142
+ </div>
143
+ </div>
144
+ """
145
+
146
+ visual += "</div>"
147
+ return visual
148
+
149
+ def create_alert_box(self, message: str, alert_type: str = "info") -> str:
150
+ """Create an alert box"""
151
+ colors = {
152
+ "info": "#2196F3",
153
+ "success": "#4CAF50",
154
+ "warning": "#FF9800",
155
+ "error": "#F44336"
156
+ }
157
+
158
+ icons = {
159
+ "info": "ℹ️",
160
+ "success": "✅",
161
+ "warning": "⚠️",
162
+ "error": "❌"
163
+ }
164
+
165
+ color = colors.get(alert_type, colors["info"])
166
+ icon = icons.get(alert_type, icons["info"])
167
+
168
+ return f"""
169
+ <div style="background: {color}15; border: 1px solid {color}; border-radius: 8px; padding: 15px; margin: 15px 0; display: flex; align-items: flex-start;">
170
+ <span style="font-size: 20px; margin-right: 10px;">{icon}</span>
171
+ <p style="margin: 0; color: {color}; font-weight: 500;">{message}</p>
172
+ </div>
173
+ """
174
+
175
+ def create_metric_cards(self, metrics: Dict[str, Any], title: str = "Key Metrics") -> str:
176
+ """Create metric cards"""
177
+ if not metrics:
178
+ return ""
179
+
180
+ cards = f"""
181
+ ## 📊 {title}
182
+
183
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 20px 0;">
184
+ """
185
+
186
+ for metric, value in metrics.items():
187
+ cards += f"""
188
+ <div style="background: white; border: 1px solid #e0e0e0; border-radius: 8px; padding: 20px; text-align: center; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
189
+ <h3 style="margin: 0 0 10px 0; color: #333; font-size: 28px;">{value}</h3>
190
+ <p style="margin: 0; color: #666; font-size: 14px; text-transform: uppercase; letter-spacing: 0.5px;">{metric}</p>
191
+ </div>
192
+ """
193
+
194
+ cards += "</div>"
195
+ return cards
196
+
197
+ def format_analysis_with_visuals(self, analysis_text: str, document_metadata: Dict[str, Any] = None) -> str:
198
+ """Format analysis text with visual elements"""
199
+ # Extract key information for visual representation
200
+ visual_elements = []
201
+
202
+ # Add document info if available
203
+ if document_metadata:
204
+ visual_elements.append(self.create_metric_cards({
205
+ "Pages": document_metadata.get('page_count', 'Unknown'),
206
+ "File Size": f"{document_metadata.get('file_size', 0) / 1024:.1f} KB",
207
+ "Processing Time": f"{document_metadata.get('processing_time', 0):.1f}s"
208
+ }, "Document Information"))
209
+
210
+ # Try to extract key points from analysis
211
+ key_points = self._extract_key_points(analysis_text)
212
+ if key_points:
213
+ visual_elements.append(self.create_key_points(key_points))
214
+
215
+ # Try to extract metrics
216
+ metrics = self._extract_metrics(analysis_text)
217
+ if metrics:
218
+ visual_elements.append(self.create_metric_cards(metrics, "Key Metrics"))
219
+
220
+ # Combine visual elements with analysis
221
+ result = analysis_text
222
+
223
+ if visual_elements:
224
+ result = "\n\n".join(visual_elements) + "\n\n---\n\n" + analysis_text
225
+
226
+ return result
227
+
228
+ def _extract_key_points(self, text: str) -> List[str]:
229
+ """Extract key points from analysis text"""
230
+ # Look for bullet points, numbered lists, or key findings
231
+ points = []
232
+
233
+ # Extract bullet points
234
+ bullet_pattern = r'[-•*]\s+(.+?)(?=\n|$)'
235
+ bullets = re.findall(bullet_pattern, text, re.MULTILINE)
236
+ points.extend([bullet.strip() for bullet in bullets if len(bullet.strip()) > 10])
237
+
238
+ # Extract numbered points
239
+ number_pattern = r'\d+\.\s+(.+?)(?=\n|$)'
240
+ numbers = re.findall(number_pattern, text, re.MULTILINE)
241
+ points.extend([num.strip() for num in numbers if len(num.strip()) > 10])
242
+
243
+ # Limit to top 5 points
244
+ return points[:5]
245
+
246
+ def _extract_metrics(self, text: str) -> Dict[str, str]:
247
+ """Extract metrics from analysis text"""
248
+ metrics = {}
249
+
250
+ # Look for percentage patterns
251
+ percent_pattern = r'(\d+(?:\.\d+)?%)'
252
+ percentages = re.findall(percent_pattern, text)
253
+ if percentages:
254
+ metrics["Success Rate"] = percentages[0]
255
+
256
+ # Look for number patterns
257
+ number_pattern = r'(\d+(?:,\d+)*(?:\.\d+)?)\s+(?:pages?|items?|points?|years?|months?)'
258
+ numbers = re.findall(number_pattern, text, re.IGNORECASE)
259
+ if numbers:
260
+ metrics["Total Items"] = numbers[0]
261
+
262
+ return metrics