mozzic commited on
Commit
b17926a
Β·
verified Β·
1 Parent(s): d345db6

Upload ui\app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. ui//app.py +1487 -0
ui//app.py ADDED
@@ -0,0 +1,1487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio UI for Context Thread Agent - Enterprise Edition
3
+ Professional document analysis with killer features
4
+ """
5
+
6
+ import gradio as gr
7
+ import json
8
+ import tempfile
9
+ import os
10
+ import html
11
+ from pathlib import Path
12
+ from typing import Tuple, List, Dict
13
+ from src.models import Cell, CellType
14
+ from datetime import datetime
15
+
16
+ from src.parser import NotebookParser
17
+ from src.dependencies import ContextThreadBuilder
18
+ from src.indexing import FAISSIndexer
19
+ from src.retrieval import RetrievalEngine, ContextBuilder
20
+ from src.reasoning import ContextualAnsweringSystem
21
+ from src.intent import ContextThreadEnricher
22
+ from src.groq_integration import GroqReasoningEngine
23
+ import pandas as pd
24
+
25
+
26
+ class NotebookAgentUI:
27
+ """Enterprise-grade Gradio UI for the Context Thread Agent."""
28
+
29
+ def __init__(self):
30
+ self.current_thread = None
31
+ self.current_indexer = None
32
+ self.current_engine = None
33
+ self.answering_system = None
34
+ self.conversation_history = []
35
+ self.groq_client = None
36
+ self.keypoints_generated = False
37
+ self.keypoints_cache = None
38
+ self.current_file_name = None
39
+ self.data_profile = None
40
+ self.current_file_path = None
41
+ self.current_file_ext = None
42
+
43
+ # Initialize Groq client
44
+ try:
45
+ self.groq_client = GroqReasoningEngine()
46
+ except Exception as e:
47
+ print(f"Warning: Groq not initialized: {e}")
48
+
49
+ def load_notebook(self, notebook_file) -> Tuple[str, bool, str, str]:
50
+ """Load and index a notebook or Excel file."""
51
+ try:
52
+ if notebook_file is None:
53
+ return "❌ No file provided", False, "", ""
54
+
55
+ # Save uploaded file temporarily
56
+ with tempfile.NamedTemporaryFile(suffix=Path(notebook_file).suffix if isinstance(notebook_file, str) else ".ipynb", delete=False) as f:
57
+ if isinstance(notebook_file, str):
58
+ f.write(open(notebook_file, 'rb').read())
59
+ else:
60
+ f.write(notebook_file.read())
61
+ temp_path = f.name
62
+
63
+ file_ext = Path(temp_path).suffix.lower()
64
+
65
+ if file_ext == '.ipynb':
66
+ parser = NotebookParser()
67
+ result = parser.parse_file(temp_path)
68
+ cells = result['cells']
69
+ elif file_ext in ['.xlsx', '.xls']:
70
+ cells = self._excel_to_cells(temp_path)
71
+ else:
72
+ return "❌ Unsupported file type. Please upload .ipynb or .xlsx/.xls", False, "", ""
73
+
74
+ # Build context thread
75
+ builder = ContextThreadBuilder(
76
+ notebook_name=Path(temp_path).stem,
77
+ thread_id=f"thread_{id(self)}"
78
+ )
79
+ builder.add_cells(cells)
80
+ self.current_thread = builder.build()
81
+
82
+ # Enrich with intents
83
+ enricher = ContextThreadEnricher(infer_intents=True)
84
+ self.current_thread = enricher.enrich(self.current_thread)
85
+
86
+ # Index
87
+ self.current_indexer = FAISSIndexer()
88
+ self.current_indexer.add_multiple(self.current_thread.units)
89
+
90
+ # Setup retrieval and reasoning
91
+ self.current_engine = RetrievalEngine(self.current_thread, self.current_indexer)
92
+ self.answering_system = ContextualAnsweringSystem(self.current_engine)
93
+
94
+ # Reset conversation
95
+ self.conversation_history = []
96
+ self.keypoints_generated = False
97
+ self.keypoints_cache = None
98
+
99
+ # Store file info for later use
100
+ self.current_file_path = temp_path
101
+ self.current_file_ext = file_ext
102
+
103
+ # Get appropriate preview based on file type
104
+ if file_ext in ['.xlsx', '.xls']:
105
+ notebook_preview = self.get_excel_display(temp_path)
106
+ else:
107
+ notebook_preview = self.get_notebook_display()
108
+ # Cleanup for non-Excel files
109
+ Path(temp_path).unlink()
110
+
111
+ status_msg = f"""
112
+ ### βœ… File Loaded Successfully!
113
+
114
+ **Document Statistics:**
115
+ - Total sections: {len(cells)}
116
+ - Code sections: {sum(1 for c in cells if c.cell_type == CellType.CODE)}
117
+ - Documentation: {sum(1 for c in cells if c.cell_type == CellType.MARKDOWN)}
118
+ - Indexed & Ready: βœ“
119
+
120
+ You can now:
121
+ - πŸ” Browse the document in the viewer
122
+ - πŸ”‘ Generate key insights (recommended)
123
+ - ❓ Ask any questions about the content
124
+ """
125
+
126
+ return status_msg, True, notebook_preview, ""
127
+
128
+ except Exception as e:
129
+ return f"❌ Error loading file: {str(e)}", False, "", ""
130
+
131
+ def generate_keypoints(self) -> str:
132
+ """Generate key points summary using Groq."""
133
+ if not self.answering_system:
134
+ return "❌ No document loaded."
135
+
136
+ if self.keypoints_cache:
137
+ return self.keypoints_cache
138
+
139
+ try:
140
+ # Get comprehensive context
141
+ all_context = []
142
+ for unit in self.current_thread.units[:30]: # First 30 cells
143
+ all_context.append(f"### {unit.cell.cell_id} [{unit.cell.cell_type}]")
144
+ if unit.intent and unit.intent != "[Pending intent inference]":
145
+ all_context.append(f"Intent: {unit.intent}")
146
+ source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
147
+ all_context.append(source_text[:500])
148
+ if unit.cell.outputs:
149
+ for output in unit.cell.outputs[:1]:
150
+ if 'text' in output:
151
+ raw_out = output['text']
152
+ if isinstance(raw_out, list):
153
+ raw_out = '\n'.join(raw_out)
154
+ all_context.append(f"Output: {raw_out[:200]}")
155
+ all_context.append("---")
156
+
157
+ context_text = "\n".join(all_context)
158
+
159
+ # Use Groq to generate keypoints
160
+ if self.groq_client:
161
+ result = self.groq_client.generate_keypoints(context_text, max_points=12)
162
+ if result["success"]:
163
+ self.keypoints_cache = f"## πŸ”‘ Key Insights & Summary\n\n{result['keypoints']}"
164
+ self.keypoints_generated = True
165
+ return self.keypoints_cache
166
+ else:
167
+ return f"❌ {result['keypoints']}"
168
+ else:
169
+ return "❌ Groq client not available. Please check your API key."
170
+
171
+ except Exception as e:
172
+ return f"❌ Error generating keypoints: {str(e)}"
173
+
174
+ def set_groq_key(self, api_key: str, enable: bool) -> str:
175
+ """Set or clear the Groq API key and reinitialize the Groq client at runtime."""
176
+ try:
177
+ if not enable:
178
+ # Disable Groq usage
179
+ self.groq_client = None
180
+ os.environ.pop("GROQ_API_KEY", None)
181
+ return "βœ… Groq disabled. The system will use fallback reasoning."
182
+
183
+ if not api_key or api_key.strip() == "":
184
+ return "❌ Please provide a valid Groq API key to enable Groq."
185
+
186
+ # Try to initialize Groq with the provided key
187
+ self.groq_client = GroqReasoningEngine(api_key=api_key.strip())
188
+ os.environ["GROQ_API_KEY"] = api_key.strip()
189
+ return "βœ… Groq enabled successfully. Using Groq for reasoning."
190
+ except Exception as e:
191
+ self.groq_client = None
192
+ return f"❌ Could not initialize Groq: {str(e)}"
193
+
194
+ def get_notebook_display(self) -> str:
195
+ """Get Google Colab-like styled notebook content."""
196
+ if not self.current_thread:
197
+ return "No document loaded."
198
+
199
+ display = """
200
+ <style>
201
+ :root {
202
+ --colab-primary: #f59b42;
203
+ --colab-secondary: #e8eaed;
204
+ --colab-text: #202124;
205
+ --colab-border: #dadce0;
206
+ }
207
+
208
+ .colab-container {
209
+ font-family: 'Roboto', 'Helvetica Neue', sans-serif;
210
+ color: var(--colab-text);
211
+ padding: 24px;
212
+ background: white;
213
+ }
214
+
215
+ .colab-header {
216
+ display: flex;
217
+ align-items: center;
218
+ gap: 12px;
219
+ margin-bottom: 32px;
220
+ padding: 16px;
221
+ background: linear-gradient(135deg, #f59b42 0%, #f5a962 100%);
222
+ border-radius: 8px;
223
+ color: white;
224
+ }
225
+
226
+ .colab-header h1 {
227
+ margin: 0;
228
+ font-size: 28px;
229
+ font-weight: 500;
230
+ }
231
+
232
+ .colab-header-subtitle {
233
+ color: rgba(255,255,255,0.9);
234
+ font-size: 14px;
235
+ margin-top: 4px;
236
+ }
237
+
238
+ .colab-cell {
239
+ background: white;
240
+ border: 1px solid var(--colab-border);
241
+ border-radius: 4px;
242
+ margin: 16px 0;
243
+ box-shadow: 0 1px 2px rgba(0,0,0,0.05);
244
+ overflow: hidden;
245
+ }
246
+
247
+ .colab-cell-header {
248
+ display: flex;
249
+ align-items: center;
250
+ gap: 12px;
251
+ padding: 12px 16px;
252
+ background: var(--colab-secondary);
253
+ border-bottom: 1px solid var(--colab-border);
254
+ font-size: 12px;
255
+ font-weight: 500;
256
+ color: #5f6368;
257
+ }
258
+
259
+ .colab-cell-number {
260
+ color: #80868b;
261
+ font-family: 'Courier New', monospace;
262
+ font-weight: bold;
263
+ }
264
+
265
+ .colab-cell-type {
266
+ display: inline-block;
267
+ padding: 2px 8px;
268
+ background: white;
269
+ border: 1px solid var(--colab-border);
270
+ border-radius: 2px;
271
+ font-size: 11px;
272
+ font-weight: 500;
273
+ }
274
+
275
+ .colab-cell-type.code {
276
+ background: #f0f0f0;
277
+ color: #1976d2;
278
+ }
279
+
280
+ .colab-cell-type.markdown {
281
+ background: #f0f0f0;
282
+ color: #d32f2f;
283
+ }
284
+
285
+ .colab-cell-intent {
286
+ display: inline-block;
287
+ padding: 3px 8px;
288
+ background: #e3f2fd;
289
+ color: #1976d2;
290
+ border-radius: 2px;
291
+ font-size: 11px;
292
+ font-weight: 500;
293
+ margin-left: auto;
294
+ }
295
+
296
+ .colab-code {
297
+ background: #282c34;
298
+ color: #abb2bf;
299
+ padding: 16px;
300
+ font-family: 'Courier New', 'Monaco', monospace;
301
+ font-size: 13px;
302
+ line-height: 1.6;
303
+ overflow-x: auto;
304
+ position: relative;
305
+ }
306
+
307
+ /* Ensure <pre> inside code blocks inherits visible color and preserves whitespace */
308
+ .colab-code pre {
309
+ color: #abb2bf !important;
310
+ white-space: pre !important;
311
+ margin: 0 !important;
312
+ font-family: inherit !important;
313
+ overflow-x: auto;
314
+ }
315
+
316
+ .colab-code-keyword { color: #c678dd; }
317
+ .colab-code-string { color: #98c379; }
318
+ .colab-code-number { color: #d19a66; }
319
+ .colab-code-function { color: #61afef; }
320
+ .colab-code-comment { color: #5c6370; font-style: italic; }
321
+
322
+ .colab-markdown {
323
+ padding: 16px;
324
+ font-size: 14px;
325
+ line-height: 1.7;
326
+ }
327
+
328
+ .colab-markdown h1 { font-size: 32px; font-weight: 500; margin: 24px 0 16px 0; }
329
+ .colab-markdown h2 { font-size: 24px; font-weight: 500; margin: 20px 0 12px 0; }
330
+ .colab-markdown h3 { font-size: 20px; font-weight: 500; margin: 16px 0 10px 0; }
331
+ .colab-markdown p { margin: 12px 0; }
332
+ .colab-markdown ul, .colab-markdown ol { margin: 12px 0; padding-left: 24px; }
333
+ .colab-markdown code {
334
+ background: #f5f5f5;
335
+ padding: 2px 6px;
336
+ border-radius: 3px;
337
+ font-family: 'Courier New', monospace;
338
+ font-size: 12px;
339
+ }
340
+ .colab-markdown pre {
341
+ background: #f5f5f5;
342
+ padding: 12px;
343
+ border-radius: 4px;
344
+ overflow-x: auto;
345
+ }
346
+
347
+ .colab-output {
348
+ background: var(--colab-secondary);
349
+ border-top: 1px solid var(--colab-border);
350
+ padding: 12px 16px;
351
+ font-family: 'Courier New', monospace;
352
+ font-size: 12px;
353
+ max-height: 400px;
354
+ overflow-y: auto;
355
+ }
356
+
357
+ .colab-output-label {
358
+ font-weight: 600;
359
+ color: #5f6368;
360
+ font-size: 11px;
361
+ margin-bottom: 8px;
362
+ }
363
+
364
+ .colab-stats {
365
+ display: flex;
366
+ gap: 16px;
367
+ margin-bottom: 24px;
368
+ flex-wrap: wrap;
369
+ }
370
+
371
+ .colab-stat {
372
+ flex: 1;
373
+ min-width: 140px;
374
+ background: white;
375
+ border: 1px solid var(--colab-border);
376
+ padding: 16px;
377
+ border-radius: 4px;
378
+ text-align: center;
379
+ }
380
+
381
+ .colab-stat-value {
382
+ font-size: 24px;
383
+ font-weight: 500;
384
+ color: var(--colab-primary);
385
+ }
386
+
387
+ .colab-stat-label {
388
+ font-size: 12px;
389
+ color: #5f6368;
390
+ margin-top: 8px;
391
+ }
392
+ </style>
393
+
394
+ <div class="colab-container">
395
+ <div class="colab-header">
396
+ <div>
397
+ <h1>πŸ““ Notebook Analysis</h1>
398
+ <div class="colab-header-subtitle">Google Colab-style Professional Viewer</div>
399
+ </div>
400
+ </div>
401
+ """
402
+
403
+ code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
404
+ markdown_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)
405
+ cells_with_output = sum(1 for u in self.current_thread.units if u.cell.outputs)
406
+
407
+ display += f"""
408
+ <div class="colab-stats">
409
+ <div class="colab-stat">
410
+ <div class="colab-stat-value">{len(self.current_thread.units)}</div>
411
+ <div class="colab-stat-label">Total Cells</div>
412
+ </div>
413
+ <div class="colab-stat">
414
+ <div class="colab-stat-value">{code_cells}</div>
415
+ <div class="colab-stat-label">Code Cells</div>
416
+ </div>
417
+ <div class="colab-stat">
418
+ <div class="colab-stat-value">{markdown_cells}</div>
419
+ <div class="colab-stat-label">Documentation</div>
420
+ </div>
421
+ <div class="colab-stat">
422
+ <div class="colab-stat-value">{cells_with_output}</div>
423
+ <div class="colab-stat-label">With Output</div>
424
+ </div>
425
+ </div>
426
+ """
427
+
428
+ for i, unit in enumerate(self.current_thread.units, 1):
429
+ cell_type_str = "CODE" if unit.cell.cell_type == CellType.CODE else "MARKDOWN"
430
+ cell_type_class = "code" if unit.cell.cell_type == CellType.CODE else "markdown"
431
+
432
+ display += f"""
433
+ <div class="colab-cell">
434
+ <div class="colab-cell-header">
435
+ <span class="colab-cell-number">[{i}]</span>
436
+ <span class="colab-cell-type {cell_type_class}">{cell_type_str}</span>
437
+ """
438
+
439
+ if unit.intent and unit.intent != "[Pending intent inference]":
440
+ display += f' <span class="colab-cell-intent">{unit.intent}</span>\n'
441
+
442
+ display += """ </div>
443
+ """
444
+
445
+ if unit.cell.cell_type == CellType.CODE:
446
+ # Escape HTML special characters and preserve whitespace
447
+ # Handle source as either string or list
448
+ source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
449
+ code = html.escape(source_text)
450
+ display += f' <div class="colab-code"><pre style="margin: 0; color: #abb2bf; white-space: pre; overflow-x: auto; font-family: \"Courier New\", monospace;">{code}</pre></div>\n'
451
+ else:
452
+ # Handle source as either string or list
453
+ source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
454
+ display += f' <div class="colab-markdown">{source_text}</div>\n'
455
+
456
+ if unit.cell.outputs:
457
+ display += ' <div class="colab-output">\n'
458
+ display += ' <div class="colab-output-label">Output</div>\n'
459
+ for output in unit.cell.outputs[:2]:
460
+ if 'text' in output:
461
+ raw_out = output['text']
462
+ if isinstance(raw_out, list):
463
+ raw_out = '\n'.join(raw_out)
464
+ output_text = html.escape(str(raw_out)[:300])
465
+ display += f' <pre>{output_text}</pre>\n'
466
+ elif 'data' in output and 'text/plain' in output['data']:
467
+ raw_out = output['data']['text/plain']
468
+ if isinstance(raw_out, list):
469
+ raw_out = '\n'.join(raw_out)
470
+ output_text = html.escape(str(raw_out)[:300])
471
+ display += f' <pre>{output_text}</pre>\n'
472
+ display += ' </div>\n'
473
+
474
+ display += """ </div>
475
+ """
476
+
477
+ display += """
478
+ </div>
479
+ """
480
+
481
+ return display
482
+
483
+ def ask_question(self, query: str, conversation_display: List) -> Tuple[List, str]:
484
+ """Answer a question about the notebook with conversation history."""
485
+ if not self.answering_system:
486
+ error_msg = "❌ No document loaded. Please upload a document first."
487
+ formatted_display = self._ensure_message_format(conversation_display)
488
+ formatted_display.append({"role": "user", "content": query})
489
+ formatted_display.append({"role": "assistant", "content": error_msg})
490
+ return formatted_display, ""
491
+
492
+ if not query or query.strip() == "":
493
+ return conversation_display, ""
494
+
495
+ try:
496
+ # Convert incoming display to role/content format
497
+ formatted_display = self._ensure_message_format(conversation_display)
498
+
499
+ # Sync internal conversation history with display
500
+ self.conversation_history = []
501
+ for msg in formatted_display:
502
+ if isinstance(msg, dict) and "role" in msg and "content" in msg:
503
+ self.conversation_history.append(msg)
504
+
505
+ # Add the new user message to internal history
506
+ self.conversation_history.append({"role": "user", "content": query})
507
+
508
+ # Check if this is a casual greeting/small talk (no document context needed)
509
+ is_casual = self._is_casual_conversation(query)
510
+
511
+ if is_casual and self.groq_client:
512
+ # Use Groq for natural conversation without document analysis
513
+ try:
514
+ answer_text = self.groq_client.reason(
515
+ query=query,
516
+ context="User is having a casual conversation.",
517
+ conversation_history=self.conversation_history
518
+ )
519
+ except Exception:
520
+ answer_text = self._get_fallback_greeting(query)
521
+ elif is_casual:
522
+ # Fallback friendly response without Groq
523
+ answer_text = self._get_fallback_greeting(query)
524
+ else:
525
+ # Document-based Q&A
526
+ response = self.answering_system.answer_question(
527
+ query,
528
+ top_k=8,
529
+ conversation_history=self.conversation_history
530
+ )
531
+
532
+ # Format answer
533
+ answer_text = response.answer
534
+
535
+ # Add citations if available
536
+ if response.citations:
537
+ answer_text += "\n\n**πŸ“š References:**\n"
538
+ for i, citation in enumerate(response.citations, 1):
539
+ answer_text += f"\n{i}. `{citation.cell_id}` [{citation.cell_type}]"
540
+ if citation.intent:
541
+ answer_text += f" - *{citation.intent}*"
542
+
543
+ # Add confidence
544
+ answer_text += f"\n\n*Confidence: {response.confidence:.0%}*"
545
+ if response.has_hallucination_risk:
546
+ answer_text += " ⚠️ *Verify information*"
547
+
548
+ # Add to both conversation history and display
549
+ self.conversation_history.append({"role": "assistant", "content": answer_text})
550
+ formatted_display.append({"role": "user", "content": query})
551
+ formatted_display.append({"role": "assistant", "content": answer_text})
552
+
553
+ return formatted_display, ""
554
+
555
+ except Exception as e:
556
+ formatted_display = self._ensure_message_format(conversation_display)
557
+ formatted_display.append({"role": "user", "content": query})
558
+ formatted_display.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
559
+ return formatted_display, ""
560
+
561
+ def _is_casual_conversation(self, query: str) -> bool:
562
+ """Detect if query is casual conversation (greeting, small talk) vs document Q&A."""
563
+ query_lower = query.lower().strip()
564
+
565
+ # Greetings
566
+ greetings = ['hi', 'hello', 'hey', 'howdy', 'greetings', 'good morning', 'good afternoon', 'good evening']
567
+ if any(query_lower.startswith(g) for g in greetings):
568
+ return True
569
+
570
+ # Small talk / general questions
571
+ small_talk = [
572
+ "how are you", "how are u", "how's it going", "what's up", "sup",
573
+ "how do i use", "how do i get started", "what can you do", "what are you",
574
+ "who are you", "tell me about yourself", "introduce yourself",
575
+ "thanks", "thank you", "great", "awesome", "nice", "cool",
576
+ "lol", "haha", "ha ha"
577
+ ]
578
+ if any(small_talk_phrase in query_lower for small_talk_phrase in small_talk):
579
+ return True
580
+
581
+ # Questions that don't reference the document
582
+ if query.startswith("?") or query.endswith("?"):
583
+ if len(query.split()) < 4: # Short questions likely casual
584
+ return True
585
+
586
+ return False
587
+
588
+ def _get_fallback_greeting(self, query: str) -> str:
589
+ """Generate a friendly fallback response for casual conversation."""
590
+ query_lower = query.lower().strip()
591
+
592
+ if any(q in query_lower for q in ['hi', 'hello', 'hey', 'greetings']):
593
+ return "πŸ‘‹ Hey there! I'm ready to analyze your documents. Upload a notebook or Excel file to get started, and I can answer questions, generate summaries, and provide insights!"
594
+ elif any(q in query_lower for q in ['how are you', "how's it going", "what's up"]):
595
+ return "😊 I'm doing great, thanks for asking! Ready to dive into your documents. What would you like to know?"
596
+ elif any(q in query_lower for q in ['what can you do', 'who are you', 'tell me about']):
597
+ return "πŸ€– I'm an AI assistant specialized in analyzing Jupyter notebooks and Excel files. I can:\n- Summarize key findings\n- Answer questions about your data\n- Generate insights and keypoints\n- Provide data profiles and statistics\n\nUpload a file to get started!"
598
+ elif any(q in query_lower for q in ['thanks', 'thank you', 'great', 'awesome']):
599
+ return "πŸ˜„ You're welcome! Happy to help. What else would you like to know about your document?"
600
+ else:
601
+ return "πŸ‘‹ I'm here to help! Upload a document and ask me anything about it. What would you like to explore?"
602
+
603
+ def _ensure_message_format(self, conversation_display: List) -> List[Dict]:
604
+ """Convert conversation display to Gradio ChatMessage format (role/content dicts)."""
605
+ if not conversation_display:
606
+ return []
607
+
608
+ result = []
609
+ for item in conversation_display:
610
+ # Already in dict format
611
+ if isinstance(item, dict) and "role" in item and "content" in item:
612
+ result.append(item)
613
+ # Old format: [user_text, assistant_text] tuple/list
614
+ elif isinstance(item, (list, tuple)) and len(item) >= 2:
615
+ result.append({"role": "user", "content": str(item[0])})
616
+ result.append({"role": "assistant", "content": str(item[1])})
617
+
618
+ return result
619
+
620
+ # ==================== KILLER FEATURES ====================
621
+
622
+ def generate_data_profile(self) -> str:
623
+ """Generate comprehensive data profiling and statistics."""
624
+ if not self.current_thread:
625
+ return "❌ No document loaded."
626
+
627
+ profile = """
628
+ <style>
629
+ .profile-card {
630
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
631
+ color: white;
632
+ padding: 20px;
633
+ border-radius: 8px;
634
+ margin: 12px 0;
635
+ }
636
+ .metric {
637
+ display: inline-block;
638
+ background: rgba(255,255,255,0.2);
639
+ padding: 12px 16px;
640
+ border-radius: 6px;
641
+ margin: 6px;
642
+ font-weight: 500;
643
+ }
644
+ .code-quality {
645
+ background: #f0f9ff;
646
+ border-left: 4px solid #0284c7;
647
+ padding: 16px;
648
+ margin: 12px 0;
649
+ border-radius: 6px;
650
+ }
651
+ .insight-box {
652
+ background: #fef3c7;
653
+ border-left: 4px solid #f59e0b;
654
+ padding: 16px;
655
+ margin: 12px 0;
656
+ border-radius: 6px;
657
+ }
658
+ </style>
659
+
660
+ <div class="profile-card">
661
+ <h2>πŸ“Š Document Profile & Analytics</h2>
662
+ <p>Comprehensive analysis of your notebook</p>
663
+ </div>
664
+ """
665
+
666
+ # Calculate metrics
667
+ total_cells = len(self.current_thread.units)
668
+ code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
669
+ markdown_cells = total_cells - code_cells
670
+ cells_with_output = sum(1 for u in self.current_thread.units if u.cell.outputs)
671
+ cells_with_intent = sum(1 for u in self.current_thread.units if u.intent and u.intent != "[Pending intent inference]")
672
+
673
+ total_lines = sum(len(u.cell.source.split('\n')) for u in self.current_thread.units)
674
+ avg_cell_size = total_lines // max(code_cells, 1)
675
+
676
+ profile += f"""
677
+ <div class="code-quality">
678
+ <h3>πŸ“ˆ Key Metrics</h3>
679
+ <div>
680
+ <div class="metric">Total Cells: <strong>{total_cells}</strong></div>
681
+ <div class="metric">Code Cells: <strong>{code_cells}</strong></div>
682
+ <div class="metric">Documentation: <strong>{markdown_cells}</strong></div>
683
+ <div class="metric">Cells with Output: <strong>{cells_with_output}</strong></div>
684
+ <div class="metric">Total Lines: <strong>{total_lines}</strong></div>
685
+ <div class="metric">Avg Cell Size: <strong>{avg_cell_size} lines</strong></div>
686
+ </div>
687
+ </div>
688
+
689
+ <div class="insight-box">
690
+ <h3>πŸ’‘ Code Quality Insights</h3>
691
+ """
692
+
693
+ # Quality analysis
694
+ insights = []
695
+
696
+ if cells_with_output / max(code_cells, 1) > 0.8:
697
+ insights.append("βœ… <strong>Excellent output coverage:</strong> Most cells produce outputs")
698
+ if cells_with_intent / total_cells > 0.7:
699
+ insights.append("βœ… <strong>Well-structured workflow:</strong> Clear intent in most cells")
700
+ if code_cells < markdown_cells:
701
+ insights.append("βœ… <strong>Well documented:</strong> Good documentation-to-code ratio")
702
+ if total_lines > 500:
703
+ insights.append("⚠️ <strong>Large notebook:</strong> Consider breaking into smaller modules")
704
+ if avg_cell_size > 30:
705
+ insights.append("⚠️ <strong>Large cells:</strong> Some cells could be smaller for clarity")
706
+
707
+ if not insights:
708
+ insights.append("ℹ️ Standard notebook structure detected")
709
+
710
+ for insight in insights:
711
+ profile += f"<p>{insight}</p>\n"
712
+
713
+ profile += """
714
+ </div>
715
+
716
+ <div class="insight-box">
717
+ <h3>πŸ” Intent Distribution</h3>
718
+ """
719
+
720
+ intent_counts = {}
721
+ for unit in self.current_thread.units:
722
+ if unit.intent and unit.intent != "[Pending intent inference]":
723
+ intent = unit.intent.split()[0] # Get first word of intent
724
+ intent_counts[intent] = intent_counts.get(intent, 0) + 1
725
+
726
+ for intent, count in sorted(intent_counts.items(), key=lambda x: x[1], reverse=True):
727
+ profile += f"<p>β€’ <strong>{intent}:</strong> {count} cells</p>\n"
728
+
729
+ profile += """
730
+ </div>
731
+
732
+ <div class="insight-box">
733
+ <h3>πŸ“¦ Dependencies & Imports</h3>
734
+ """
735
+
736
+ imports = set()
737
+ for unit in self.current_thread.units:
738
+ if unit.cell.cell_type == CellType.CODE:
739
+ source = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
740
+ if 'import ' in source:
741
+ for line in source.split('\n'):
742
+ if line.strip().startswith(('import ', 'from ')):
743
+ # Extract module name
744
+ module = line.split('import')[0].replace('from', '').strip()
745
+ if module:
746
+ imports.add(module)
747
+
748
+ if imports:
749
+ for imp in sorted(imports)[:10]:
750
+ profile += f"<p>β€’ <code>{imp}</code></p>\n"
751
+ else:
752
+ profile += "<p>No imports detected</p>\n"
753
+
754
+ profile += """
755
+ </div>
756
+ """
757
+
758
+ return profile
759
+
760
+ def export_analysis(self) -> str:
761
+ """Export analysis results."""
762
+ if not self.current_thread:
763
+ return "❌ No document loaded."
764
+
765
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
766
+ filename = f"analysis_{self.current_file_name or 'notebook'}_{timestamp}.md"
767
+
768
+ # Create markdown report
769
+ report = f"""# Document Analysis Report
770
+ Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
771
+
772
+ ## Executive Summary
773
+ {self.keypoints_cache or "Key insights would be generated here."}
774
+
775
+ ## Key Metrics
776
+ - Total Cells: {len(self.current_thread.units)}
777
+ - Code Cells: {sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)}
778
+ - Documentation Cells: {sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)}
779
+
780
+ ## Questions Asked
781
+ """
782
+
783
+ for msg in self.conversation_history:
784
+ if msg["role"] == "user":
785
+ report += f"\n- {msg['content'][:100]}"
786
+
787
+ # Save to file
788
+ with open(filename, 'w') as f:
789
+ f.write(report)
790
+
791
+ return f"βœ… Report exported to `{filename}`"
792
+
793
+ def advanced_search(self, search_term: str) -> str:
794
+ """Advanced search across all cells."""
795
+ if not self.current_thread or not search_term:
796
+ return "❌ No document loaded or search term empty."
797
+
798
+ results = []
799
+ search_lower = search_term.lower()
800
+
801
+ for i, unit in enumerate(self.current_thread.units, 1):
802
+ source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
803
+ if search_lower in source_text.lower():
804
+ results.append({
805
+ "cell": i,
806
+ "type": unit.cell.cell_type,
807
+ "intent": unit.intent,
808
+ "snippet": source_text[:150]
809
+ })
810
+
811
+ if not results:
812
+ return f"No results found for '{search_term}'"
813
+
814
+ output = f"<h3>πŸ” Found {len(results)} matches for '{search_term}'</h3>\n"
815
+
816
+ for r in results[:10]:
817
+ output += f"""
818
+ <div style="background: #f0f4f8; padding: 12px; margin: 8px 0; border-radius: 6px; border-left: 4px solid #0284c7;">
819
+ <strong>Cell {r['cell']}</strong> [{r['type'].upper()}] {r['intent']}<br/>
820
+ <code style="font-size: 0.85em;">{r['snippet']}...</code>
821
+ </div>
822
+ """
823
+
824
+ return output
825
+
826
+ def get_recommendations(self) -> str:
827
+ """Generate smart recommendations."""
828
+ if not self.current_thread:
829
+ return "❌ No document loaded."
830
+
831
+ recommendations = """
832
+ <style>
833
+ .rec-card {
834
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
835
+ color: white;
836
+ padding: 20px;
837
+ border-radius: 8px;
838
+ margin: 12px 0;
839
+ }
840
+ .rec-item {
841
+ background: rgba(0,0,0,0.2);
842
+ padding: 12px;
843
+ margin: 8px 0;
844
+ border-radius: 6px;
845
+ }
846
+ </style>
847
+
848
+ <div class="rec-card">
849
+ <h2>⭐ AI-Powered Recommendations</h2>
850
+ </div>
851
+ """
852
+
853
+ recs = []
854
+
855
+ code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
856
+ markdown_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)
857
+
858
+ if code_cells > 20:
859
+ recs.append("πŸ”„ Consider modularizing code into separate files/functions")
860
+ if markdown_cells == 0:
861
+ recs.append("πŸ“ Add documentation cells for better clarity")
862
+ if len(self.current_thread.units) > 50:
863
+ recs.append("πŸ“š This notebook is large - consider splitting into multiple notebooks")
864
+
865
+ # Check for common issues
866
+ large_cells = sum(1 for u in self.current_thread.units if len(u.cell.source) > 1000)
867
+ if large_cells > 0:
868
+ recs.append(f"βœ‚οΈ {large_cells} cells are very large - consider breaking them down")
869
+
870
+ cells_without_output = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE and not u.cell.outputs)
871
+ if cells_without_output > code_cells * 0.3:
872
+ recs.append("⚠️ Many code cells don't have outputs - ensure cells are executable")
873
+
874
+ if not recs:
875
+ recs.append("βœ… Notebook follows best practices!")
876
+
877
+ for i, rec in enumerate(recs, 1):
878
+ recommendations += f'<div class="rec-item">{i}. {rec}</div>\n'
879
+
880
+ return recommendations
881
+
882
+ def _excel_to_cells(self, excel_path: str) -> List[Cell]:
883
+ """Convert Excel file to notebook-like cells."""
884
+ from src.models import Cell, CellType
885
+
886
+ cells = []
887
+ xl = pd.ExcelFile(excel_path)
888
+
889
+ # Add overview cell
890
+ cells.append(Cell(
891
+ cell_id="excel_overview",
892
+ cell_type=CellType.MARKDOWN,
893
+ source=f"# Excel Document Analysis\n\nSheets: {', '.join(xl.sheet_names)}\nTotal Sheets: {len(xl.sheet_names)}",
894
+ outputs=[]
895
+ ))
896
+
897
+ for sheet_name in xl.sheet_names:
898
+ df = xl.parse(sheet_name)
899
+
900
+ # Sheet header
901
+ cells.append(Cell(
902
+ cell_id=f"sheet_{sheet_name}_header",
903
+ cell_type=CellType.MARKDOWN,
904
+ source=f"## Sheet: {sheet_name}\n\n**Dimensions:** {df.shape[0]} rows Γ— {df.shape[1]} columns",
905
+ outputs=[]
906
+ ))
907
+
908
+ # Column info
909
+ col_info = "\n".join([f"- {col}: {dtype}" for col, dtype in df.dtypes.items()])
910
+ cells.append(Cell(
911
+ cell_id=f"sheet_{sheet_name}_columns",
912
+ cell_type=CellType.MARKDOWN,
913
+ source=f"### Columns\n{col_info}",
914
+ outputs=[]
915
+ ))
916
+
917
+ # Data preview
918
+ cells.append(Cell(
919
+ cell_id=f"data_{sheet_name}_preview",
920
+ cell_type=CellType.CODE,
921
+ source=f"# Preview of {sheet_name}\ndf_{sheet_name}.head(10)",
922
+ outputs=[{"data": {"text/plain": df.head(10).to_string()}}]
923
+ ))
924
+
925
+ # Statistics
926
+ if df.select_dtypes(include=['number']).shape[1] > 0:
927
+ stats = df.describe().to_string()
928
+ cells.append(Cell(
929
+ cell_id=f"stats_{sheet_name}",
930
+ cell_type=CellType.CODE,
931
+ source=f"# Statistics for {sheet_name}\ndf_{sheet_name}.describe()",
932
+ outputs=[{"data": {"text/plain": stats}}]
933
+ ))
934
+
935
+ return cells
936
+
937
+ def get_excel_display(self, excel_path: str) -> str:
938
+ """Get Microsoft Excel-like styled spreadsheet content."""
939
+ xl = pd.ExcelFile(excel_path)
940
+ sheet_names = xl.sheet_names
941
+
942
+ if not sheet_names:
943
+ return "No sheets found in Excel file."
944
+
945
+ primary_sheet = sheet_names[0]
946
+ df = xl.parse(primary_sheet)
947
+
948
+ display = """
949
+ <style>
950
+ .excel-container {
951
+ font-family: 'Calibri', 'Arial', sans-serif;
952
+ padding: 16px;
953
+ background: white;
954
+ }
955
+
956
+ .excel-header {
957
+ display: flex;
958
+ align-items: center;
959
+ gap: 12px;
960
+ margin-bottom: 24px;
961
+ padding: 12px 16px;
962
+ background: linear-gradient(135deg, #2d7f38 0%, #4caf50 100%);
963
+ border-radius: 4px;
964
+ color: white;
965
+ }
966
+
967
+ .excel-header h1 {
968
+ margin: 0;
969
+ font-size: 24px;
970
+ font-weight: 500;
971
+ }
972
+
973
+ .excel-header-subtitle {
974
+ color: rgba(255,255,255,0.95);
975
+ font-size: 12px;
976
+ margin-top: 2px;
977
+ }
978
+
979
+ .excel-toolbar {
980
+ display: flex;
981
+ gap: 8px;
982
+ padding: 12px 0;
983
+ border-bottom: 1px solid #e0e0e0;
984
+ margin-bottom: 16px;
985
+ overflow-x: auto;
986
+ }
987
+
988
+ .excel-tab {
989
+ padding: 8px 16px;
990
+ background: white;
991
+ border: 1px solid #d0d0d0;
992
+ border-bottom: none;
993
+ border-radius: 4px 4px 0 0;
994
+ cursor: pointer;
995
+ font-weight: 500;
996
+ color: #666;
997
+ font-size: 13px;
998
+ white-space: nowrap;
999
+ }
1000
+
1001
+ .excel-tab.active {
1002
+ background: white;
1003
+ color: #2d7f38;
1004
+ border-color: #2d7f38;
1005
+ border-bottom: 2px solid white;
1006
+ margin-bottom: -1px;
1007
+ }
1008
+
1009
+ .excel-grid-wrapper {
1010
+ overflow-x: auto;
1011
+ border: 1px solid #d0d0d0;
1012
+ border-radius: 4px;
1013
+ background: white;
1014
+ }
1015
+
1016
+ .excel-grid table {
1017
+ width: 100%;
1018
+ border-collapse: collapse;
1019
+ font-size: 13px;
1020
+ }
1021
+
1022
+ .excel-grid th {
1023
+ background: #f3f3f3;
1024
+ border: 1px solid #d0d0d0;
1025
+ padding: 8px 12px;
1026
+ text-align: left;
1027
+ font-weight: 600;
1028
+ color: #333;
1029
+ position: sticky;
1030
+ top: 0;
1031
+ z-index: 10;
1032
+ min-width: 80px;
1033
+ }
1034
+
1035
+ .excel-grid td {
1036
+ border: 1px solid #e0e0e0;
1037
+ padding: 8px 12px;
1038
+ color: #333;
1039
+ background: white;
1040
+ }
1041
+
1042
+ .excel-grid tr:nth-child(even) td {
1043
+ background: #f9f9f9;
1044
+ }
1045
+
1046
+ .excel-grid tr:hover td {
1047
+ background: #e8f5e9;
1048
+ }
1049
+
1050
+ .excel-row-header {
1051
+ background: #f3f3f3;
1052
+ border: 1px solid #d0d0d0;
1053
+ padding: 8px 12px;
1054
+ font-weight: 600;
1055
+ color: #666;
1056
+ text-align: center;
1057
+ width: 40px;
1058
+ min-width: 40px;
1059
+ }
1060
+
1061
+ .excel-stats {
1062
+ display: flex;
1063
+ gap: 16px;
1064
+ margin-bottom: 24px;
1065
+ flex-wrap: wrap;
1066
+ }
1067
+
1068
+ .excel-stat {
1069
+ flex: 1;
1070
+ min-width: 120px;
1071
+ background: #f9f9f9;
1072
+ border: 1px solid #d0d0d0;
1073
+ padding: 12px;
1074
+ border-radius: 4px;
1075
+ text-align: center;
1076
+ }
1077
+
1078
+ .excel-stat-value {
1079
+ font-size: 20px;
1080
+ font-weight: 600;
1081
+ color: #2d7f38;
1082
+ }
1083
+
1084
+ .excel-stat-label {
1085
+ font-size: 12px;
1086
+ color: #666;
1087
+ margin-top: 6px;
1088
+ }
1089
+
1090
+ .excel-data-info {
1091
+ background: #f0f7f0;
1092
+ border-left: 4px solid #2d7f38;
1093
+ padding: 12px;
1094
+ margin-bottom: 16px;
1095
+ border-radius: 4px;
1096
+ font-size: 13px;
1097
+ }
1098
+
1099
+ .excel-data-info strong {
1100
+ color: #2d7f38;
1101
+ }
1102
+ </style>
1103
+
1104
+ <div class="excel-container">
1105
+ <div class="excel-header">
1106
+ <div>
1107
+ <h1>πŸ“Š Excel Data Viewer</h1>
1108
+ <div class="excel-header-subtitle">Microsoft Excel-style Professional Spreadsheet</div>
1109
+ </div>
1110
+ </div>
1111
+ """
1112
+
1113
+ display += f"""
1114
+ <div class="excel-stats">
1115
+ <div class="excel-stat">
1116
+ <div class="excel-stat-value">{len(df)}</div>
1117
+ <div class="excel-stat-label">Rows</div>
1118
+ </div>
1119
+ <div class="excel-stat">
1120
+ <div class="excel-stat-value">{len(df.columns)}</div>
1121
+ <div class="excel-stat-label">Columns</div>
1122
+ </div>
1123
+ <div class="excel-stat">
1124
+ <div class="excel-stat-value">{df.memory_usage(deep=True).sum() / 1024:.1f} KB</div>
1125
+ <div class="excel-stat-label">Size</div>
1126
+ </div>
1127
+ <div class="excel-stat">
1128
+ <div class="excel-stat-value">{df.isnull().sum().sum()}</div>
1129
+ <div class="excel-stat-label">Missing</div>
1130
+ </div>
1131
+ </div>
1132
+
1133
+ <div class="excel-data-info">
1134
+ <strong>πŸ“‹ Data Summary:</strong> {len(df)} rows Γ— {len(df.columns)} columns | Dtypes: {', '.join(map(str, df.dtypes.unique()))}
1135
+ </div>
1136
+
1137
+ <div class="excel-toolbar">
1138
+ <div class="excel-tab active">{primary_sheet}</div>
1139
+ """
1140
+
1141
+ for sheet in sheet_names[1:]:
1142
+ display += f' <div class="excel-tab">{sheet}</div>\n'
1143
+
1144
+ display += """ </div>
1145
+
1146
+ <div class="excel-grid-wrapper">
1147
+ <table class="excel-grid">
1148
+ <thead>
1149
+ <tr>
1150
+ <th class="excel-row-header"></th>
1151
+ """
1152
+
1153
+ for col in df.columns:
1154
+ display += f" <th>{col}</th>\n"
1155
+
1156
+ display += """ </tr>
1157
+ </thead>
1158
+ <tbody>
1159
+ """
1160
+
1161
+ for idx, row in df.head(100).iterrows():
1162
+ display += f" <tr>\n <td class='excel-row-header'>{idx + 1}</td>\n"
1163
+ for col in df.columns:
1164
+ value = row[col]
1165
+ if pd.isna(value):
1166
+ display += " <td style='color: #ccc;'>β€”</td>\n"
1167
+ else:
1168
+ if isinstance(value, (int, float)):
1169
+ formatted_value = f"{value:,.2f}" if isinstance(value, float) else str(value)
1170
+ else:
1171
+ formatted_value = str(value)[:50]
1172
+ display += f" <td>{formatted_value}</td>\n"
1173
+ display += " </tr>\n"
1174
+
1175
+ if len(df) > 100:
1176
+ display += f""" <tr>
1177
+ <td colspan="{len(df.columns) + 1}" style="text-align: center; color: #999; padding: 12px;">
1178
+ ... and {len(df) - 100} more rows
1179
+ </td>
1180
+ </tr>
1181
+ """
1182
+
1183
+ display += """ </tbody>
1184
+ </table>
1185
+ </div>
1186
+
1187
+ </div>
1188
+ """
1189
+
1190
+ return display
1191
+
1192
+
1193
+ def create_gradio_app():
1194
+ """Create and return the enhanced Gradio interface."""
1195
+ agent = NotebookAgentUI()
1196
+
1197
+ # Auto-initialize Groq if key present in environment but client wasn't created earlier
1198
+ try:
1199
+ if not agent.groq_client:
1200
+ groq_key = os.getenv("GROQ_API_KEY")
1201
+ # Fallback: read .env directly if load_dotenv didn't pick it up
1202
+ if not groq_key:
1203
+ env_path = Path(__file__).parent.parent / '.env'
1204
+ if env_path.exists():
1205
+ content = env_path.read_text(encoding='utf-8')
1206
+ for line in content.splitlines():
1207
+ line = line.strip()
1208
+ if line.startswith('GROQ_API_KEY=') and not line.startswith('#'):
1209
+ groq_key = line.split('=', 1)[1].strip()
1210
+ if groq_key:
1211
+ break
1212
+
1213
+ if groq_key:
1214
+ try:
1215
+ agent.set_groq_key(groq_key, True)
1216
+ except Exception:
1217
+ pass
1218
+ except Exception:
1219
+ pass
1220
+
1221
+ # Custom CSS for better styling
1222
+ custom_css = """
1223
+ .main-header {
1224
+ text-align: center;
1225
+ padding: 2rem;
1226
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1227
+ color: white;
1228
+ border-radius: 10px;
1229
+ margin-bottom: 2rem;
1230
+ }
1231
+ .feature-box {
1232
+ padding: 1rem;
1233
+ border: 2px solid #e0e0e0;
1234
+ border-radius: 8px;
1235
+ margin: 0.5rem 0;
1236
+ }
1237
+ .upload-section {
1238
+ text-align: center;
1239
+ padding: 2rem;
1240
+ border: 3px dashed #667eea;
1241
+ border-radius: 10px;
1242
+ background: #f8f9ff;
1243
+ }
1244
+ """
1245
+
1246
+ with gr.Blocks(title="Context Thread Agent", theme=gr.themes.Soft(), css=custom_css) as demo:
1247
+ gr.HTML("""
1248
+ <div class="main-header">
1249
+ <h1>🧡 Context Thread Agent</h1>
1250
+ <p style="font-size: 1.2rem; margin-top: 1rem;">
1251
+ AI-Powered Document Analysis & Q&A System
1252
+ </p>
1253
+ </div>
1254
+ """)
1255
+
1256
+ with gr.Row():
1257
+ with gr.Column(scale=2):
1258
+ gr.Markdown("""
1259
+ ## 🎯 What is Context Thread Agent?
1260
+
1261
+ Context Thread Agent is an **intelligent document analysis platform** that helps you understand and extract insights from complex Jupyter notebooks and Excel spreadsheets. Using advanced AI (powered by **Groq LLM**), it provides:
1262
+
1263
+ ### πŸš€ Major Use Cases:
1264
+
1265
+ - **πŸ“Š Data Analysis Review**: Understand complex analytical workflows instantly
1266
+ - **πŸ” Code Audit**: Verify assumptions and logic in data science notebooks
1267
+ - **πŸ“ˆ Excel Report Analysis**: Extract insights from large spreadsheets
1268
+ - **πŸ€– Automated Documentation**: Generate summaries and key findings
1269
+ - **πŸ’‘ Knowledge Extraction**: Ask questions about methodology and results
1270
+ - **πŸ”— Dependency Tracking**: Understand how different parts connect
1271
+ - **βœ… Quality Assurance**: Validate calculations and transformations
1272
+
1273
+ ### ✨ Key Features:
1274
+ - βœ“ **100% Grounded Answers** - No hallucinations, only facts from your document
1275
+ - βœ“ **Citation-Based** - Every answer references specific cells
1276
+ - βœ“ **Context-Aware** - Understands relationships between code sections
1277
+ - βœ“ **Conversation Memory** - Maintains context across questions
1278
+ - βœ“ **Key Insights Generation** - AI-powered summary of main points
1279
+ - βœ“ **Fast & Free** - Powered by Groq's lightning-fast inference
1280
+ """)
1281
+
1282
+ with gr.Column(scale=1):
1283
+ gr.HTML("""
1284
+ <div class="upload-section">
1285
+ <h3>πŸ“€ Quick Start</h3>
1286
+ <p>Upload your document and start exploring</p>
1287
+ </div>
1288
+ """)
1289
+
1290
+ file_input = gr.File(
1291
+ label="Upload Your Document",
1292
+ file_types=[".ipynb", ".xlsx", ".xls"],
1293
+ type="filepath",
1294
+ elem_classes="upload-input"
1295
+ )
1296
+ upload_btn = gr.Button(
1297
+ "πŸ“€ Upload & Analyze",
1298
+ variant="primary",
1299
+ size="lg",
1300
+ scale=2
1301
+ )
1302
+
1303
+ upload_status = gr.Markdown("### πŸ“‹ Status\n\nReady to upload...")
1304
+
1305
+ # Groq status - show only status if enabled, otherwise show input
1306
+ if agent.groq_client:
1307
+ groq_status = gr.Markdown("### πŸš€ Groq Configuration\n\nβœ… **Groq is enabled and ready!**\n\nYour Groq API key has been loaded from environment. Advanced reasoning will be used for analysis.")
1308
+ # Hidden inputs for compatibility
1309
+ groq_key_input = gr.Textbox(visible=False)
1310
+ groq_toggle = gr.Checkbox(visible=False)
1311
+ set_groq_btn = gr.Button(visible=False)
1312
+ else:
1313
+ # Show input if Groq not enabled
1314
+ groq_key_input = gr.Textbox(
1315
+ label="Groq API Key",
1316
+ placeholder="Paste your Groq key (gsk_...)",
1317
+ type="password"
1318
+ )
1319
+ groq_toggle = gr.Checkbox(label="Use Groq for reasoning", value=False)
1320
+ set_groq_btn = gr.Button("Set Groq Key", variant="secondary")
1321
+ groq_status = gr.Markdown("⚠️ **Groq not configured.** Add your key and click 'Set Groq Key' to enable advanced reasoning.")
1322
+
1323
+ # Wire the set key button only if inputs are visible
1324
+ set_groq_btn.click(agent.set_groq_key, inputs=[groq_key_input, groq_toggle], outputs=[groq_status])
1325
+
1326
+ gr.Markdown("---")
1327
+
1328
+ # Main interface (hidden until upload)
1329
+ with gr.Column(visible=False) as main_interface:
1330
+ gr.Markdown("## πŸ’Ό Analysis Workspace")
1331
+
1332
+ with gr.Row():
1333
+ # Left side: Document viewer
1334
+ with gr.Column(scale=1):
1335
+ gr.Markdown("### πŸ““ Document Viewer")
1336
+
1337
+ with gr.Tabs():
1338
+ with gr.Tab("πŸ“„ Content"):
1339
+ notebook_display = gr.HTML(
1340
+ value="",
1341
+ label="Document Content",
1342
+ elem_classes="notebook-viewer"
1343
+ )
1344
+
1345
+ with gr.Tab("πŸ”‘ Key Points"):
1346
+ keypoints_btn = gr.Button(
1347
+ "πŸ”„ Generate Key Insights",
1348
+ variant="secondary",
1349
+ size="lg"
1350
+ )
1351
+ gr.Markdown("*This may take 10-30 seconds for comprehensive analysis...*")
1352
+ keypoints_display = gr.Markdown(
1353
+ value="",
1354
+ label="Key Insights"
1355
+ )
1356
+
1357
+ with gr.Tab("πŸ“Š Analytics"):
1358
+ analytics_btn = gr.Button("πŸ“Š Generate Profile", variant="secondary", size="lg")
1359
+ analytics_display = gr.Markdown(value="", label="Analytics")
1360
+
1361
+ with gr.Tab("⭐ Recommendations"):
1362
+ rec_btn = gr.Button("πŸ’‘ Get Recommendations", variant="secondary", size="lg")
1363
+ rec_display = gr.Markdown(value="", label="Recommendations")
1364
+
1365
+ with gr.Tab("πŸ” Advanced Search"):
1366
+ search_input = gr.Textbox(
1367
+ label="Search Term",
1368
+ placeholder="Search in all cells...",
1369
+ lines=1
1370
+ )
1371
+ search_btn = gr.Button("πŸ”Ž Search", variant="secondary")
1372
+ search_display = gr.Markdown(value="", label="Search Results")
1373
+
1374
+ with gr.Tab("πŸ“₯ Export"):
1375
+ export_btn = gr.Button("πŸ“₯ Export Analysis Report", variant="secondary", size="lg")
1376
+ export_display = gr.Markdown(value="", label="Export Status")
1377
+
1378
+ # Right side: Q&A Interface
1379
+ with gr.Column(scale=1):
1380
+ gr.Markdown("### πŸ’¬ Ask Questions")
1381
+
1382
+ chatbot = gr.Chatbot(
1383
+ label="Conversation",
1384
+ height=500,
1385
+ elem_classes="chat-box"
1386
+ )
1387
+
1388
+ with gr.Row():
1389
+ query_input = gr.Textbox(
1390
+ label="Your Question",
1391
+ placeholder="e.g., 'What are the main findings?' or 'Why was Q4 data removed?'",
1392
+ lines=2,
1393
+ scale=4
1394
+ )
1395
+ ask_btn = gr.Button("πŸ€– Ask", variant="primary", scale=1)
1396
+
1397
+ gr.Markdown("""
1398
+ **πŸ’‘ Example Questions:**
1399
+ - What is this document about?
1400
+ - What are the key findings?
1401
+ - Why was [specific data] removed?
1402
+ - How was [metric] calculated?
1403
+ - What patterns were found?
1404
+ - Are there any data quality issues?
1405
+ """)
1406
+
1407
+ # Event handlers
1408
+ def on_upload(file):
1409
+ status, show_interface, notebook_content, keypoints = agent.load_notebook(file)
1410
+ return (
1411
+ status,
1412
+ gr.update(visible=show_interface),
1413
+ notebook_content,
1414
+ keypoints
1415
+ )
1416
+
1417
+ upload_btn.click(
1418
+ fn=on_upload,
1419
+ inputs=[file_input],
1420
+ outputs=[upload_status, main_interface, notebook_display, keypoints_display]
1421
+ )
1422
+
1423
+ # Keypoints generation with loading state
1424
+ def generate_with_loading():
1425
+ return "⏳ **Analyzing document and generating insights...**\n\nThis may take 10-30 seconds depending on document complexity."
1426
+
1427
+ keypoints_btn.click(
1428
+ fn=generate_with_loading,
1429
+ inputs=[],
1430
+ outputs=[keypoints_display]
1431
+ ).then(
1432
+ fn=agent.generate_keypoints,
1433
+ inputs=[],
1434
+ outputs=[keypoints_display]
1435
+ )
1436
+
1437
+ # Analytics tab
1438
+ analytics_btn.click(
1439
+ fn=agent.generate_data_profile,
1440
+ inputs=[],
1441
+ outputs=[analytics_display]
1442
+ )
1443
+
1444
+ # Recommendations tab
1445
+ rec_btn.click(
1446
+ fn=agent.get_recommendations,
1447
+ inputs=[],
1448
+ outputs=[rec_display]
1449
+ )
1450
+
1451
+ # Advanced search
1452
+ search_btn.click(
1453
+ fn=agent.advanced_search,
1454
+ inputs=[search_input],
1455
+ outputs=[search_display]
1456
+ )
1457
+
1458
+ # Export
1459
+ export_btn.click(
1460
+ fn=agent.export_analysis,
1461
+ inputs=[],
1462
+ outputs=[export_display]
1463
+ )
1464
+
1465
+ # Q&A interaction
1466
+ ask_btn.click(
1467
+ fn=agent.ask_question,
1468
+ inputs=[query_input, chatbot],
1469
+ outputs=[chatbot, query_input]
1470
+ )
1471
+
1472
+ query_input.submit(
1473
+ fn=agent.ask_question,
1474
+ inputs=[query_input, chatbot],
1475
+ outputs=[chatbot, query_input]
1476
+ )
1477
+
1478
+ return demo
1479
+
1480
+
1481
+ if __name__ == "__main__":
1482
+ demo = create_gradio_app()
1483
+ demo.launch(
1484
+ server_name="0.0.0.0",
1485
+ server_port=7860,
1486
+ share=True
1487
+ )