Spaces:

mozzic
/

context-thread-agent

Sleeping

File size: 54,838 Bytes

b17926a

"""

Gradio UI for Context Thread Agent - Enterprise Edition

Professional document analysis with killer features

"""

import gradio as gr
import json
import tempfile
import os
import html
from pathlib import Path
from typing import Tuple, List, Dict
from src.models import Cell, CellType
from datetime import datetime

from src.parser import NotebookParser
from src.dependencies import ContextThreadBuilder
from src.indexing import FAISSIndexer
from src.retrieval import RetrievalEngine, ContextBuilder
from src.reasoning import ContextualAnsweringSystem
from src.intent import ContextThreadEnricher
from src.groq_integration import GroqReasoningEngine
import pandas as pd


class NotebookAgentUI:
    """Enterprise-grade Gradio UI for the Context Thread Agent."""
    
    def __init__(self):
        self.current_thread = None
        self.current_indexer = None
        self.current_engine = None
        self.answering_system = None
        self.conversation_history = []
        self.groq_client = None
        self.keypoints_generated = False
        self.keypoints_cache = None
        self.current_file_name = None
        self.data_profile = None
        self.current_file_path = None
        self.current_file_ext = None
        
        # Initialize Groq client
        try:
            self.groq_client = GroqReasoningEngine()
        except Exception as e:
            print(f"Warning: Groq not initialized: {e}")
    
    def load_notebook(self, notebook_file) -> Tuple[str, bool, str, str]:
        """Load and index a notebook or Excel file."""
        try:
            if notebook_file is None:
                return "❌ No file provided", False, "", ""
            
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(suffix=Path(notebook_file).suffix if isinstance(notebook_file, str) else ".ipynb", delete=False) as f:
                if isinstance(notebook_file, str):
                    f.write(open(notebook_file, 'rb').read())
                else:
                    f.write(notebook_file.read())
                temp_path = f.name
            
            file_ext = Path(temp_path).suffix.lower()
            
            if file_ext == '.ipynb':
                parser = NotebookParser()
                result = parser.parse_file(temp_path)
                cells = result['cells']
            elif file_ext in ['.xlsx', '.xls']:
                cells = self._excel_to_cells(temp_path)
            else:
                return "❌ Unsupported file type. Please upload .ipynb or .xlsx/.xls", False, "", ""
            
            # Build context thread
            builder = ContextThreadBuilder(
                notebook_name=Path(temp_path).stem,
                thread_id=f"thread_{id(self)}"
            )
            builder.add_cells(cells)
            self.current_thread = builder.build()
            
            # Enrich with intents
            enricher = ContextThreadEnricher(infer_intents=True)
            self.current_thread = enricher.enrich(self.current_thread)
            
            # Index
            self.current_indexer = FAISSIndexer()
            self.current_indexer.add_multiple(self.current_thread.units)
            
            # Setup retrieval and reasoning
            self.current_engine = RetrievalEngine(self.current_thread, self.current_indexer)
            self.answering_system = ContextualAnsweringSystem(self.current_engine)
            
            # Reset conversation
            self.conversation_history = []
            self.keypoints_generated = False
            self.keypoints_cache = None
            
            # Store file info for later use
            self.current_file_path = temp_path
            self.current_file_ext = file_ext
            
            # Get appropriate preview based on file type
            if file_ext in ['.xlsx', '.xls']:
                notebook_preview = self.get_excel_display(temp_path)
            else:
                notebook_preview = self.get_notebook_display()
                # Cleanup for non-Excel files
                Path(temp_path).unlink()
            
            status_msg = f"""

### ✅ File Loaded Successfully!



**Document Statistics:**

- Total sections: {len(cells)}

- Code sections: {sum(1 for c in cells if c.cell_type == CellType.CODE)}

- Documentation: {sum(1 for c in cells if c.cell_type == CellType.MARKDOWN)}

- Indexed & Ready: ✓



You can now:

- 🔍 Browse the document in the viewer

- 🔑 Generate key insights (recommended)

- ❓ Ask any questions about the content

"""
            
            return status_msg, True, notebook_preview, ""
        
        except Exception as e:
            return f"❌ Error loading file: {str(e)}", False, "", ""
    
    def generate_keypoints(self) -> str:
        """Generate key points summary using Groq."""
        if not self.answering_system:
            return "❌ No document loaded."
        
        if self.keypoints_cache:
            return self.keypoints_cache
        
        try:
            # Get comprehensive context
            all_context = []
            for unit in self.current_thread.units[:30]:  # First 30 cells
                all_context.append(f"### {unit.cell.cell_id} [{unit.cell.cell_type}]")
                if unit.intent and unit.intent != "[Pending intent inference]":
                    all_context.append(f"Intent: {unit.intent}")
                source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
                all_context.append(source_text[:500])
                if unit.cell.outputs:
                    for output in unit.cell.outputs[:1]:
                        if 'text' in output:
                            raw_out = output['text']
                            if isinstance(raw_out, list):
                                raw_out = '\n'.join(raw_out)
                            all_context.append(f"Output: {raw_out[:200]}")
                all_context.append("---")
            
            context_text = "\n".join(all_context)
            
            # Use Groq to generate keypoints
            if self.groq_client:
                result = self.groq_client.generate_keypoints(context_text, max_points=12)
                if result["success"]:
                    self.keypoints_cache = f"## 🔑 Key Insights & Summary\n\n{result['keypoints']}"
                    self.keypoints_generated = True
                    return self.keypoints_cache
                else:
                    return f"❌ {result['keypoints']}"
            else:
                return "❌ Groq client not available. Please check your API key."
        
        except Exception as e:
            return f"❌ Error generating keypoints: {str(e)}"
    
    def set_groq_key(self, api_key: str, enable: bool) -> str:
        """Set or clear the Groq API key and reinitialize the Groq client at runtime."""
        try:
            if not enable:
                # Disable Groq usage
                self.groq_client = None
                os.environ.pop("GROQ_API_KEY", None)
                return "✅ Groq disabled. The system will use fallback reasoning."
            
            if not api_key or api_key.strip() == "":
                return "❌ Please provide a valid Groq API key to enable Groq."
            
            # Try to initialize Groq with the provided key
            self.groq_client = GroqReasoningEngine(api_key=api_key.strip())
            os.environ["GROQ_API_KEY"] = api_key.strip()
            return "✅ Groq enabled successfully. Using Groq for reasoning."
        except Exception as e:
            self.groq_client = None
            return f"❌ Could not initialize Groq: {str(e)}"
    
    def get_notebook_display(self) -> str:
        """Get Google Colab-like styled notebook content."""
        if not self.current_thread:
            return "No document loaded."
        
        display = """

<style>

:root {

    --colab-primary: #f59b42;

    --colab-secondary: #e8eaed;

    --colab-text: #202124;

    --colab-border: #dadce0;

}



.colab-container {

    font-family: 'Roboto', 'Helvetica Neue', sans-serif;

    color: var(--colab-text);

    padding: 24px;

    background: white;

}



.colab-header {

    display: flex;

    align-items: center;

    gap: 12px;

    margin-bottom: 32px;

    padding: 16px;

    background: linear-gradient(135deg, #f59b42 0%, #f5a962 100%);

    border-radius: 8px;

    color: white;

}



.colab-header h1 {

    margin: 0;

    font-size: 28px;

    font-weight: 500;

}



.colab-header-subtitle {

    color: rgba(255,255,255,0.9);

    font-size: 14px;

    margin-top: 4px;

}



.colab-cell {

    background: white;

    border: 1px solid var(--colab-border);

    border-radius: 4px;

    margin: 16px 0;

    box-shadow: 0 1px 2px rgba(0,0,0,0.05);

    overflow: hidden;

}



.colab-cell-header {

    display: flex;

    align-items: center;

    gap: 12px;

    padding: 12px 16px;

    background: var(--colab-secondary);

    border-bottom: 1px solid var(--colab-border);

    font-size: 12px;

    font-weight: 500;

    color: #5f6368;

}



.colab-cell-number {

    color: #80868b;

    font-family: 'Courier New', monospace;

    font-weight: bold;

}



.colab-cell-type {

    display: inline-block;

    padding: 2px 8px;

    background: white;

    border: 1px solid var(--colab-border);

    border-radius: 2px;

    font-size: 11px;

    font-weight: 500;

}



.colab-cell-type.code {

    background: #f0f0f0;

    color: #1976d2;

}



.colab-cell-type.markdown {

    background: #f0f0f0;

    color: #d32f2f;

}



.colab-cell-intent {

    display: inline-block;

    padding: 3px 8px;

    background: #e3f2fd;

    color: #1976d2;

    border-radius: 2px;

    font-size: 11px;

    font-weight: 500;

    margin-left: auto;

}



.colab-code {

    background: #282c34;

    color: #abb2bf;

    padding: 16px;

    font-family: 'Courier New', 'Monaco', monospace;

    font-size: 13px;

    line-height: 1.6;

    overflow-x: auto;

    position: relative;

}



/* Ensure <pre> inside code blocks inherits visible color and preserves whitespace */

.colab-code pre {

    color: #abb2bf !important;

    white-space: pre !important;

    margin: 0 !important;

    font-family: inherit !important;

    overflow-x: auto;

}



.colab-code-keyword { color: #c678dd; }

.colab-code-string { color: #98c379; }

.colab-code-number { color: #d19a66; }

.colab-code-function { color: #61afef; }

.colab-code-comment { color: #5c6370; font-style: italic; }



.colab-markdown {

    padding: 16px;

    font-size: 14px;

    line-height: 1.7;

}



.colab-markdown h1 { font-size: 32px; font-weight: 500; margin: 24px 0 16px 0; }

.colab-markdown h2 { font-size: 24px; font-weight: 500; margin: 20px 0 12px 0; }

.colab-markdown h3 { font-size: 20px; font-weight: 500; margin: 16px 0 10px 0; }

.colab-markdown p { margin: 12px 0; }

.colab-markdown ul, .colab-markdown ol { margin: 12px 0; padding-left: 24px; }

.colab-markdown code { 

    background: #f5f5f5; 

    padding: 2px 6px; 

    border-radius: 3px;

    font-family: 'Courier New', monospace;

    font-size: 12px;

}

.colab-markdown pre { 

    background: #f5f5f5; 

    padding: 12px; 

    border-radius: 4px;

    overflow-x: auto;

}



.colab-output {

    background: var(--colab-secondary);

    border-top: 1px solid var(--colab-border);

    padding: 12px 16px;

    font-family: 'Courier New', monospace;

    font-size: 12px;

    max-height: 400px;

    overflow-y: auto;

}



.colab-output-label {

    font-weight: 600;

    color: #5f6368;

    font-size: 11px;

    margin-bottom: 8px;

}



.colab-stats {

    display: flex;

    gap: 16px;

    margin-bottom: 24px;

    flex-wrap: wrap;

}



.colab-stat {

    flex: 1;

    min-width: 140px;

    background: white;

    border: 1px solid var(--colab-border);

    padding: 16px;

    border-radius: 4px;

    text-align: center;

}



.colab-stat-value {

    font-size: 24px;

    font-weight: 500;

    color: var(--colab-primary);

}



.colab-stat-label {

    font-size: 12px;

    color: #5f6368;

    margin-top: 8px;

}

</style>



<div class="colab-container">

    <div class="colab-header">

        <div>

            <h1>📓 Notebook Analysis</h1>

            <div class="colab-header-subtitle">Google Colab-style Professional Viewer</div>

        </div>

    </div>

"""
        
        code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
        markdown_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)
        cells_with_output = sum(1 for u in self.current_thread.units if u.cell.outputs)
        
        display += f"""

    <div class="colab-stats">

        <div class="colab-stat">

            <div class="colab-stat-value">{len(self.current_thread.units)}</div>

            <div class="colab-stat-label">Total Cells</div>

        </div>

        <div class="colab-stat">

            <div class="colab-stat-value">{code_cells}</div>

            <div class="colab-stat-label">Code Cells</div>

        </div>

        <div class="colab-stat">

            <div class="colab-stat-value">{markdown_cells}</div>

            <div class="colab-stat-label">Documentation</div>

        </div>

        <div class="colab-stat">

            <div class="colab-stat-value">{cells_with_output}</div>

            <div class="colab-stat-label">With Output</div>

        </div>

    </div>

"""
        
        for i, unit in enumerate(self.current_thread.units, 1):
            cell_type_str = "CODE" if unit.cell.cell_type == CellType.CODE else "MARKDOWN"
            cell_type_class = "code" if unit.cell.cell_type == CellType.CODE else "markdown"
            
            display += f"""

    <div class="colab-cell">

        <div class="colab-cell-header">

            <span class="colab-cell-number">[{i}]</span>

            <span class="colab-cell-type {cell_type_class}">{cell_type_str}</span>

"""
            
            if unit.intent and unit.intent != "[Pending intent inference]":
                display += f'            <span class="colab-cell-intent">{unit.intent}</span>\n'
            
            display += """        </div>

"""
            
            if unit.cell.cell_type == CellType.CODE:
                # Escape HTML special characters and preserve whitespace
                # Handle source as either string or list
                source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
                code = html.escape(source_text)
                display += f'        <div class="colab-code"><pre style="margin: 0; color: #abb2bf; white-space: pre; overflow-x: auto; font-family: \"Courier New\", monospace;">{code}</pre></div>\n'
            else:
                # Handle source as either string or list
                source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
                display += f'        <div class="colab-markdown">{source_text}</div>\n'
            
            if unit.cell.outputs:
                display += '        <div class="colab-output">\n'
                display += '            <div class="colab-output-label">Output</div>\n'
                for output in unit.cell.outputs[:2]:
                    if 'text' in output:
                        raw_out = output['text']
                        if isinstance(raw_out, list):
                            raw_out = '\n'.join(raw_out)
                        output_text = html.escape(str(raw_out)[:300])
                        display += f'            <pre>{output_text}</pre>\n'
                    elif 'data' in output and 'text/plain' in output['data']:
                        raw_out = output['data']['text/plain']
                        if isinstance(raw_out, list):
                            raw_out = '\n'.join(raw_out)
                        output_text = html.escape(str(raw_out)[:300])
                        display += f'            <pre>{output_text}</pre>\n'
                display += '        </div>\n'
            
            display += """    </div>

"""
        
        display += """

</div>

"""
        
        return display
    
    def ask_question(self, query: str, conversation_display: List) -> Tuple[List, str]:
        """Answer a question about the notebook with conversation history."""
        if not self.answering_system:
            error_msg = "❌ No document loaded. Please upload a document first."
            formatted_display = self._ensure_message_format(conversation_display)
            formatted_display.append({"role": "user", "content": query})
            formatted_display.append({"role": "assistant", "content": error_msg})
            return formatted_display, ""
        
        if not query or query.strip() == "":
            return conversation_display, ""
        
        try:
            # Convert incoming display to role/content format
            formatted_display = self._ensure_message_format(conversation_display)
            
            # Sync internal conversation history with display
            self.conversation_history = []
            for msg in formatted_display:
                if isinstance(msg, dict) and "role" in msg and "content" in msg:
                    self.conversation_history.append(msg)

            # Add the new user message to internal history
            self.conversation_history.append({"role": "user", "content": query})

            # Check if this is a casual greeting/small talk (no document context needed)
            is_casual = self._is_casual_conversation(query)
            
            if is_casual and self.groq_client:
                # Use Groq for natural conversation without document analysis
                try:
                    answer_text = self.groq_client.reason(
                        query=query,
                        context="User is having a casual conversation.",
                        conversation_history=self.conversation_history
                    )
                except Exception:
                    answer_text = self._get_fallback_greeting(query)
            elif is_casual:
                # Fallback friendly response without Groq
                answer_text = self._get_fallback_greeting(query)
            else:
                # Document-based Q&A
                response = self.answering_system.answer_question(
                    query,
                    top_k=8,
                    conversation_history=self.conversation_history
                )

                # Format answer
                answer_text = response.answer

                # Add citations if available
                if response.citations:
                    answer_text += "\n\n**📚 References:**\n"
                    for i, citation in enumerate(response.citations, 1):
                        answer_text += f"\n{i}. `{citation.cell_id}` [{citation.cell_type}]"
                        if citation.intent:
                            answer_text += f" - *{citation.intent}*"

                # Add confidence
                answer_text += f"\n\n*Confidence: {response.confidence:.0%}*"
                if response.has_hallucination_risk:
                    answer_text += " ⚠️ *Verify information*"

            # Add to both conversation history and display
            self.conversation_history.append({"role": "assistant", "content": answer_text})
            formatted_display.append({"role": "user", "content": query})
            formatted_display.append({"role": "assistant", "content": answer_text})

            return formatted_display, ""

        except Exception as e:
            formatted_display = self._ensure_message_format(conversation_display)
            formatted_display.append({"role": "user", "content": query})
            formatted_display.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
            return formatted_display, ""
    
    def _is_casual_conversation(self, query: str) -> bool:
        """Detect if query is casual conversation (greeting, small talk) vs document Q&A."""
        query_lower = query.lower().strip()
        
        # Greetings
        greetings = ['hi', 'hello', 'hey', 'howdy', 'greetings', 'good morning', 'good afternoon', 'good evening']
        if any(query_lower.startswith(g) for g in greetings):
            return True
        
        # Small talk / general questions
        small_talk = [
            "how are you", "how are u", "how's it going", "what's up", "sup",
            "how do i use", "how do i get started", "what can you do", "what are you",
            "who are you", "tell me about yourself", "introduce yourself",
            "thanks", "thank you", "great", "awesome", "nice", "cool",
            "lol", "haha", "ha ha"
        ]
        if any(small_talk_phrase in query_lower for small_talk_phrase in small_talk):
            return True
        
        # Questions that don't reference the document
        if query.startswith("?") or query.endswith("?"):
            if len(query.split()) < 4:  # Short questions likely casual
                return True
        
        return False
    
    def _get_fallback_greeting(self, query: str) -> str:
        """Generate a friendly fallback response for casual conversation."""
        query_lower = query.lower().strip()
        
        if any(q in query_lower for q in ['hi', 'hello', 'hey', 'greetings']):
            return "👋 Hey there! I'm ready to analyze your documents. Upload a notebook or Excel file to get started, and I can answer questions, generate summaries, and provide insights!"
        elif any(q in query_lower for q in ['how are you', "how's it going", "what's up"]):
            return "😊 I'm doing great, thanks for asking! Ready to dive into your documents. What would you like to know?"
        elif any(q in query_lower for q in ['what can you do', 'who are you', 'tell me about']):
            return "🤖 I'm an AI assistant specialized in analyzing Jupyter notebooks and Excel files. I can:\n- Summarize key findings\n- Answer questions about your data\n- Generate insights and keypoints\n- Provide data profiles and statistics\n\nUpload a file to get started!"
        elif any(q in query_lower for q in ['thanks', 'thank you', 'great', 'awesome']):
            return "😄 You're welcome! Happy to help. What else would you like to know about your document?"
        else:
            return "👋 I'm here to help! Upload a document and ask me anything about it. What would you like to explore?"
    
    def _ensure_message_format(self, conversation_display: List) -> List[Dict]:
        """Convert conversation display to Gradio ChatMessage format (role/content dicts)."""
        if not conversation_display:
            return []
        
        result = []
        for item in conversation_display:
            # Already in dict format
            if isinstance(item, dict) and "role" in item and "content" in item:
                result.append(item)
            # Old format: [user_text, assistant_text] tuple/list
            elif isinstance(item, (list, tuple)) and len(item) >= 2:
                result.append({"role": "user", "content": str(item[0])})
                result.append({"role": "assistant", "content": str(item[1])})
        
        return result
    
    # ==================== KILLER FEATURES ====================
    
    def generate_data_profile(self) -> str:
        """Generate comprehensive data profiling and statistics."""
        if not self.current_thread:
            return "❌ No document loaded."
        
        profile = """

<style>

.profile-card {

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

    color: white;

    padding: 20px;

    border-radius: 8px;

    margin: 12px 0;

}

.metric {

    display: inline-block;

    background: rgba(255,255,255,0.2);

    padding: 12px 16px;

    border-radius: 6px;

    margin: 6px;

    font-weight: 500;

}

.code-quality {

    background: #f0f9ff;

    border-left: 4px solid #0284c7;

    padding: 16px;

    margin: 12px 0;

    border-radius: 6px;

}

.insight-box {

    background: #fef3c7;

    border-left: 4px solid #f59e0b;

    padding: 16px;

    margin: 12px 0;

    border-radius: 6px;

}

</style>



<div class="profile-card">

    <h2>📊 Document Profile & Analytics</h2>

    <p>Comprehensive analysis of your notebook</p>

</div>

"""
        
        # Calculate metrics
        total_cells = len(self.current_thread.units)
        code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
        markdown_cells = total_cells - code_cells
        cells_with_output = sum(1 for u in self.current_thread.units if u.cell.outputs)
        cells_with_intent = sum(1 for u in self.current_thread.units if u.intent and u.intent != "[Pending intent inference]")
        
        total_lines = sum(len(u.cell.source.split('\n')) for u in self.current_thread.units)
        avg_cell_size = total_lines // max(code_cells, 1)
        
        profile += f"""

<div class="code-quality">

    <h3>📈 Key Metrics</h3>

    <div>

        <div class="metric">Total Cells: <strong>{total_cells}</strong></div>

        <div class="metric">Code Cells: <strong>{code_cells}</strong></div>

        <div class="metric">Documentation: <strong>{markdown_cells}</strong></div>

        <div class="metric">Cells with Output: <strong>{cells_with_output}</strong></div>

        <div class="metric">Total Lines: <strong>{total_lines}</strong></div>

        <div class="metric">Avg Cell Size: <strong>{avg_cell_size} lines</strong></div>

    </div>

</div>



<div class="insight-box">

    <h3>💡 Code Quality Insights</h3>

"""
        
        # Quality analysis
        insights = []
        
        if cells_with_output / max(code_cells, 1) > 0.8:
            insights.append("✅ <strong>Excellent output coverage:</strong> Most cells produce outputs")
        if cells_with_intent / total_cells > 0.7:
            insights.append("✅ <strong>Well-structured workflow:</strong> Clear intent in most cells")
        if code_cells < markdown_cells:
            insights.append("✅ <strong>Well documented:</strong> Good documentation-to-code ratio")
        if total_lines > 500:
            insights.append("⚠️ <strong>Large notebook:</strong> Consider breaking into smaller modules")
        if avg_cell_size > 30:
            insights.append("⚠️ <strong>Large cells:</strong> Some cells could be smaller for clarity")
        
        if not insights:
            insights.append("ℹ️ Standard notebook structure detected")
        
        for insight in insights:
            profile += f"<p>{insight}</p>\n"
        
        profile += """

</div>



<div class="insight-box">

    <h3>🔍 Intent Distribution</h3>

"""
        
        intent_counts = {}
        for unit in self.current_thread.units:
            if unit.intent and unit.intent != "[Pending intent inference]":
                intent = unit.intent.split()[0]  # Get first word of intent
                intent_counts[intent] = intent_counts.get(intent, 0) + 1
        
        for intent, count in sorted(intent_counts.items(), key=lambda x: x[1], reverse=True):
            profile += f"<p>• <strong>{intent}:</strong> {count} cells</p>\n"
        
        profile += """

</div>



<div class="insight-box">

    <h3>📦 Dependencies & Imports</h3>

"""
        
        imports = set()
        for unit in self.current_thread.units:
            if unit.cell.cell_type == CellType.CODE:
                source = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
                if 'import ' in source:
                    for line in source.split('\n'):
                        if line.strip().startswith(('import ', 'from ')):
                            # Extract module name
                            module = line.split('import')[0].replace('from', '').strip()
                            if module:
                                imports.add(module)
        
        if imports:
            for imp in sorted(imports)[:10]:
                profile += f"<p>• <code>{imp}</code></p>\n"
        else:
            profile += "<p>No imports detected</p>\n"
        
        profile += """

</div>

"""
        
        return profile
    
    def export_analysis(self) -> str:
        """Export analysis results."""
        if not self.current_thread:
            return "❌ No document loaded."
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"analysis_{self.current_file_name or 'notebook'}_{timestamp}.md"
        
        # Create markdown report
        report = f"""# Document Analysis Report

Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}



## Executive Summary

{self.keypoints_cache or "Key insights would be generated here."}



## Key Metrics

- Total Cells: {len(self.current_thread.units)}

- Code Cells: {sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)}

- Documentation Cells: {sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)}



## Questions Asked

"""
        
        for msg in self.conversation_history:
            if msg["role"] == "user":
                report += f"\n- {msg['content'][:100]}"
        
        # Save to file
        with open(filename, 'w') as f:
            f.write(report)
        
        return f"✅ Report exported to `{filename}`"
    
    def advanced_search(self, search_term: str) -> str:
        """Advanced search across all cells."""
        if not self.current_thread or not search_term:
            return "❌ No document loaded or search term empty."
        
        results = []
        search_lower = search_term.lower()
        
        for i, unit in enumerate(self.current_thread.units, 1):
            source_text = unit.cell.source if isinstance(unit.cell.source, str) else ''.join(unit.cell.source)
            if search_lower in source_text.lower():
                results.append({
                    "cell": i,
                    "type": unit.cell.cell_type,
                    "intent": unit.intent,
                    "snippet": source_text[:150]
                })
        
        if not results:
            return f"No results found for '{search_term}'"
        
        output = f"<h3>🔍 Found {len(results)} matches for '{search_term}'</h3>\n"
        
        for r in results[:10]:
            output += f"""

<div style="background: #f0f4f8; padding: 12px; margin: 8px 0; border-radius: 6px; border-left: 4px solid #0284c7;">

<strong>Cell {r['cell']}</strong> [{r['type'].upper()}] {r['intent']}<br/>

<code style="font-size: 0.85em;">{r['snippet']}...</code>

</div>

"""
        
        return output
    
    def get_recommendations(self) -> str:
        """Generate smart recommendations."""
        if not self.current_thread:
            return "❌ No document loaded."
        
        recommendations = """

<style>

.rec-card {

    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);

    color: white;

    padding: 20px;

    border-radius: 8px;

    margin: 12px 0;

}

.rec-item {

    background: rgba(0,0,0,0.2);

    padding: 12px;

    margin: 8px 0;

    border-radius: 6px;

}

</style>



<div class="rec-card">

    <h2>⭐ AI-Powered Recommendations</h2>

</div>

"""
        
        recs = []
        
        code_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE)
        markdown_cells = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.MARKDOWN)
        
        if code_cells > 20:
            recs.append("🔄 Consider modularizing code into separate files/functions")
        if markdown_cells == 0:
            recs.append("📝 Add documentation cells for better clarity")
        if len(self.current_thread.units) > 50:
            recs.append("📚 This notebook is large - consider splitting into multiple notebooks")
        
        # Check for common issues
        large_cells = sum(1 for u in self.current_thread.units if len(u.cell.source) > 1000)
        if large_cells > 0:
            recs.append(f"✂️ {large_cells} cells are very large - consider breaking them down")
        
        cells_without_output = sum(1 for u in self.current_thread.units if u.cell.cell_type == CellType.CODE and not u.cell.outputs)
        if cells_without_output > code_cells * 0.3:
            recs.append("⚠️ Many code cells don't have outputs - ensure cells are executable")
        
        if not recs:
            recs.append("✅ Notebook follows best practices!")
        
        for i, rec in enumerate(recs, 1):
            recommendations += f'<div class="rec-item">{i}. {rec}</div>\n'
        
        return recommendations
    
    def _excel_to_cells(self, excel_path: str) -> List[Cell]:
        """Convert Excel file to notebook-like cells."""
        from src.models import Cell, CellType
        
        cells = []
        xl = pd.ExcelFile(excel_path)
        
        # Add overview cell
        cells.append(Cell(
            cell_id="excel_overview",
            cell_type=CellType.MARKDOWN,
            source=f"# Excel Document Analysis\n\nSheets: {', '.join(xl.sheet_names)}\nTotal Sheets: {len(xl.sheet_names)}",
            outputs=[]
        ))
        
        for sheet_name in xl.sheet_names:
            df = xl.parse(sheet_name)
            
            # Sheet header
            cells.append(Cell(
                cell_id=f"sheet_{sheet_name}_header",
                cell_type=CellType.MARKDOWN,
                source=f"## Sheet: {sheet_name}\n\n**Dimensions:** {df.shape[0]} rows × {df.shape[1]} columns",
                outputs=[]
            ))
            
            # Column info
            col_info = "\n".join([f"- {col}: {dtype}" for col, dtype in df.dtypes.items()])
            cells.append(Cell(
                cell_id=f"sheet_{sheet_name}_columns",
                cell_type=CellType.MARKDOWN,
                source=f"### Columns\n{col_info}",
                outputs=[]
            ))
            
            # Data preview
            cells.append(Cell(
                cell_id=f"data_{sheet_name}_preview",
                cell_type=CellType.CODE,
                source=f"# Preview of {sheet_name}\ndf_{sheet_name}.head(10)",
                outputs=[{"data": {"text/plain": df.head(10).to_string()}}]
            ))
            
            # Statistics
            if df.select_dtypes(include=['number']).shape[1] > 0:
                stats = df.describe().to_string()
                cells.append(Cell(
                    cell_id=f"stats_{sheet_name}",
                    cell_type=CellType.CODE,
                    source=f"# Statistics for {sheet_name}\ndf_{sheet_name}.describe()",
                    outputs=[{"data": {"text/plain": stats}}]
                ))
        
        return cells
    
    def get_excel_display(self, excel_path: str) -> str:
        """Get Microsoft Excel-like styled spreadsheet content."""
        xl = pd.ExcelFile(excel_path)
        sheet_names = xl.sheet_names
        
        if not sheet_names:
            return "No sheets found in Excel file."
        
        primary_sheet = sheet_names[0]
        df = xl.parse(primary_sheet)
        
        display = """

<style>

.excel-container {

    font-family: 'Calibri', 'Arial', sans-serif;

    padding: 16px;

    background: white;

}



.excel-header {

    display: flex;

    align-items: center;

    gap: 12px;

    margin-bottom: 24px;

    padding: 12px 16px;

    background: linear-gradient(135deg, #2d7f38 0%, #4caf50 100%);

    border-radius: 4px;

    color: white;

}



.excel-header h1 {

    margin: 0;

    font-size: 24px;

    font-weight: 500;

}



.excel-header-subtitle {

    color: rgba(255,255,255,0.95);

    font-size: 12px;

    margin-top: 2px;

}



.excel-toolbar {

    display: flex;

    gap: 8px;

    padding: 12px 0;

    border-bottom: 1px solid #e0e0e0;

    margin-bottom: 16px;

    overflow-x: auto;

}



.excel-tab {

    padding: 8px 16px;

    background: white;

    border: 1px solid #d0d0d0;

    border-bottom: none;

    border-radius: 4px 4px 0 0;

    cursor: pointer;

    font-weight: 500;

    color: #666;

    font-size: 13px;

    white-space: nowrap;

}



.excel-tab.active {

    background: white;

    color: #2d7f38;

    border-color: #2d7f38;

    border-bottom: 2px solid white;

    margin-bottom: -1px;

}



.excel-grid-wrapper {

    overflow-x: auto;

    border: 1px solid #d0d0d0;

    border-radius: 4px;

    background: white;

}



.excel-grid table {

    width: 100%;

    border-collapse: collapse;

    font-size: 13px;

}



.excel-grid th {

    background: #f3f3f3;

    border: 1px solid #d0d0d0;

    padding: 8px 12px;

    text-align: left;

    font-weight: 600;

    color: #333;

    position: sticky;

    top: 0;

    z-index: 10;

    min-width: 80px;

}



.excel-grid td {

    border: 1px solid #e0e0e0;

    padding: 8px 12px;

    color: #333;

    background: white;

}



.excel-grid tr:nth-child(even) td {

    background: #f9f9f9;

}



.excel-grid tr:hover td {

    background: #e8f5e9;

}



.excel-row-header {

    background: #f3f3f3;

    border: 1px solid #d0d0d0;

    padding: 8px 12px;

    font-weight: 600;

    color: #666;

    text-align: center;

    width: 40px;

    min-width: 40px;

}



.excel-stats {

    display: flex;

    gap: 16px;

    margin-bottom: 24px;

    flex-wrap: wrap;

}



.excel-stat {

    flex: 1;

    min-width: 120px;

    background: #f9f9f9;

    border: 1px solid #d0d0d0;

    padding: 12px;

    border-radius: 4px;

    text-align: center;

}



.excel-stat-value {

    font-size: 20px;

    font-weight: 600;

    color: #2d7f38;

}



.excel-stat-label {

    font-size: 12px;

    color: #666;

    margin-top: 6px;

}



.excel-data-info {

    background: #f0f7f0;

    border-left: 4px solid #2d7f38;

    padding: 12px;

    margin-bottom: 16px;

    border-radius: 4px;

    font-size: 13px;

}



.excel-data-info strong {

    color: #2d7f38;

}

</style>



<div class="excel-container">

    <div class="excel-header">

        <div>

            <h1>📊 Excel Data Viewer</h1>

            <div class="excel-header-subtitle">Microsoft Excel-style Professional Spreadsheet</div>

        </div>

    </div>

"""
        
        display += f"""

    <div class="excel-stats">

        <div class="excel-stat">

            <div class="excel-stat-value">{len(df)}</div>

            <div class="excel-stat-label">Rows</div>

        </div>

        <div class="excel-stat">

            <div class="excel-stat-value">{len(df.columns)}</div>

            <div class="excel-stat-label">Columns</div>

        </div>

        <div class="excel-stat">

            <div class="excel-stat-value">{df.memory_usage(deep=True).sum() / 1024:.1f} KB</div>

            <div class="excel-stat-label">Size</div>

        </div>

        <div class="excel-stat">

            <div class="excel-stat-value">{df.isnull().sum().sum()}</div>

            <div class="excel-stat-label">Missing</div>

        </div>

    </div>



    <div class="excel-data-info">

        <strong>📋 Data Summary:</strong> {len(df)} rows × {len(df.columns)} columns | Dtypes: {', '.join(map(str, df.dtypes.unique()))}

    </div>



    <div class="excel-toolbar">

        <div class="excel-tab active">{primary_sheet}</div>

"""
        
        for sheet in sheet_names[1:]:
            display += f'        <div class="excel-tab">{sheet}</div>\n'
        
        display += """    </div>



    <div class="excel-grid-wrapper">

        <table class="excel-grid">

            <thead>

                <tr>

                    <th class="excel-row-header"></th>

"""
        
        for col in df.columns:
            display += f"                    <th>{col}</th>\n"
        
        display += """                </tr>

            </thead>

            <tbody>

"""
        
        for idx, row in df.head(100).iterrows():
            display += f"                <tr>\n                    <td class='excel-row-header'>{idx + 1}</td>\n"
            for col in df.columns:
                value = row[col]
                if pd.isna(value):
                    display += "                    <td style='color: #ccc;'>—</td>\n"
                else:
                    if isinstance(value, (int, float)):
                        formatted_value = f"{value:,.2f}" if isinstance(value, float) else str(value)
                    else:
                        formatted_value = str(value)[:50]
                    display += f"                    <td>{formatted_value}</td>\n"
            display += "                </tr>\n"
        
        if len(df) > 100:
            display += f"""                <tr>

                    <td colspan="{len(df.columns) + 1}" style="text-align: center; color: #999; padding: 12px;">

                        ... and {len(df) - 100} more rows

                    </td>

                </tr>

"""
        
        display += """            </tbody>

        </table>

    </div>



</div>

"""
        
        return display


def create_gradio_app():
    """Create and return the enhanced Gradio interface."""
    agent = NotebookAgentUI()

    # Auto-initialize Groq if key present in environment but client wasn't created earlier
    try:
        if not agent.groq_client:
            groq_key = os.getenv("GROQ_API_KEY")
            # Fallback: read .env directly if load_dotenv didn't pick it up
            if not groq_key:
                env_path = Path(__file__).parent.parent / '.env'
                if env_path.exists():
                    content = env_path.read_text(encoding='utf-8')
                    for line in content.splitlines():
                        line = line.strip()
                        if line.startswith('GROQ_API_KEY=') and not line.startswith('#'):
                            groq_key = line.split('=', 1)[1].strip()
                            if groq_key:
                                break

            if groq_key:
                try:
                    agent.set_groq_key(groq_key, True)
                except Exception:
                    pass
    except Exception:
        pass
    
    # Custom CSS for better styling
    custom_css = """

    .main-header {

        text-align: center;

        padding: 2rem;

        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

        color: white;

        border-radius: 10px;

        margin-bottom: 2rem;

    }

    .feature-box {

        padding: 1rem;

        border: 2px solid #e0e0e0;

        border-radius: 8px;

        margin: 0.5rem 0;

    }

    .upload-section {

        text-align: center;

        padding: 2rem;

        border: 3px dashed #667eea;

        border-radius: 10px;

        background: #f8f9ff;

    }

    """
    
    with gr.Blocks(title="Context Thread Agent", theme=gr.themes.Soft(), css=custom_css) as demo:
        gr.HTML("""

        <div class="main-header">

            <h1>🧵 Context Thread Agent</h1>

            <p style="font-size: 1.2rem; margin-top: 1rem;">

                AI-Powered Document Analysis & Q&A System

            </p>

        </div>

        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("""

                ## 🎯 What is Context Thread Agent?

                

                Context Thread Agent is an **intelligent document analysis platform** that helps you understand and extract insights from complex Jupyter notebooks and Excel spreadsheets. Using advanced AI (powered by **Groq LLM**), it provides:

                

                ### 🚀 Major Use Cases:

                

                - **📊 Data Analysis Review**: Understand complex analytical workflows instantly

                - **🔍 Code Audit**: Verify assumptions and logic in data science notebooks

                - **📈 Excel Report Analysis**: Extract insights from large spreadsheets

                - **🤖 Automated Documentation**: Generate summaries and key findings

                - **💡 Knowledge Extraction**: Ask questions about methodology and results

                - **🔗 Dependency Tracking**: Understand how different parts connect

                - **✅ Quality Assurance**: Validate calculations and transformations

                

                ### ✨ Key Features:

                - ✓ **100% Grounded Answers** - No hallucinations, only facts from your document

                - ✓ **Citation-Based** - Every answer references specific cells

                - ✓ **Context-Aware** - Understands relationships between code sections

                - ✓ **Conversation Memory** - Maintains context across questions

                - ✓ **Key Insights Generation** - AI-powered summary of main points

                - ✓ **Fast & Free** - Powered by Groq's lightning-fast inference

                """)
            
            with gr.Column(scale=1):
                gr.HTML("""

                <div class="upload-section">

                    <h3>📤 Quick Start</h3>

                    <p>Upload your document and start exploring</p>

                </div>

                """)
                
                file_input = gr.File(
                    label="Upload Your Document",
                    file_types=[".ipynb", ".xlsx", ".xls"],
                    type="filepath",
                    elem_classes="upload-input"
                )
                upload_btn = gr.Button(
                    "📤 Upload & Analyze", 
                    variant="primary", 
                    size="lg",
                    scale=2
                )
                
                upload_status = gr.Markdown("### 📋 Status\n\nReady to upload...")
                
                # Groq status - show only status if enabled, otherwise show input
                if agent.groq_client:
                    groq_status = gr.Markdown("### 🚀 Groq Configuration\n\n✅ **Groq is enabled and ready!**\n\nYour Groq API key has been loaded from environment. Advanced reasoning will be used for analysis.")
                    # Hidden inputs for compatibility
                    groq_key_input = gr.Textbox(visible=False)
                    groq_toggle = gr.Checkbox(visible=False)
                    set_groq_btn = gr.Button(visible=False)
                else:
                    # Show input if Groq not enabled
                    groq_key_input = gr.Textbox(
                        label="Groq API Key",
                        placeholder="Paste your Groq key (gsk_...)",
                        type="password"
                    )
                    groq_toggle = gr.Checkbox(label="Use Groq for reasoning", value=False)
                    set_groq_btn = gr.Button("Set Groq Key", variant="secondary")
                    groq_status = gr.Markdown("⚠️ **Groq not configured.** Add your key and click 'Set Groq Key' to enable advanced reasoning.")
                    
                    # Wire the set key button only if inputs are visible
                    set_groq_btn.click(agent.set_groq_key, inputs=[groq_key_input, groq_toggle], outputs=[groq_status])
        
        gr.Markdown("---")
        
        # Main interface (hidden until upload)
        with gr.Column(visible=False) as main_interface:
            gr.Markdown("## 💼 Analysis Workspace")
            
            with gr.Row():
                # Left side: Document viewer
                with gr.Column(scale=1):
                    gr.Markdown("### 📓 Document Viewer")
                    
                    with gr.Tabs():
                        with gr.Tab("📄 Content"):
                            notebook_display = gr.HTML(
                                value="",
                                label="Document Content",
                                elem_classes="notebook-viewer"
                            )
                        
                        with gr.Tab("🔑 Key Points"):
                            keypoints_btn = gr.Button(
                                "🔄 Generate Key Insights", 
                                variant="secondary",
                                size="lg"
                            )
                            gr.Markdown("*This may take 10-30 seconds for comprehensive analysis...*")
                            keypoints_display = gr.Markdown(
                                value="",
                                label="Key Insights"
                            )
                        
                        with gr.Tab("📊 Analytics"):
                            analytics_btn = gr.Button("📊 Generate Profile", variant="secondary", size="lg")
                            analytics_display = gr.Markdown(value="", label="Analytics")
                        
                        with gr.Tab("⭐ Recommendations"):
                            rec_btn = gr.Button("💡 Get Recommendations", variant="secondary", size="lg")
                            rec_display = gr.Markdown(value="", label="Recommendations")
                        
                        with gr.Tab("🔍 Advanced Search"):
                            search_input = gr.Textbox(
                                label="Search Term",
                                placeholder="Search in all cells...",
                                lines=1
                            )
                            search_btn = gr.Button("🔎 Search", variant="secondary")
                            search_display = gr.Markdown(value="", label="Search Results")
                        
                        with gr.Tab("📥 Export"):
                            export_btn = gr.Button("📥 Export Analysis Report", variant="secondary", size="lg")
                            export_display = gr.Markdown(value="", label="Export Status")
                
                # Right side: Q&A Interface
                with gr.Column(scale=1):
                    gr.Markdown("### 💬 Ask Questions")
                    
                    chatbot = gr.Chatbot(
                        label="Conversation",
                        height=500,
                        elem_classes="chat-box"
                    )
                    
                    with gr.Row():
                        query_input = gr.Textbox(
                            label="Your Question",
                            placeholder="e.g., 'What are the main findings?' or 'Why was Q4 data removed?'",
                            lines=2,
                            scale=4
                        )
                        ask_btn = gr.Button("🤖 Ask", variant="primary", scale=1)
                    
                    gr.Markdown("""

                    **💡 Example Questions:**

                    - What is this document about?

                    - What are the key findings?

                    - Why was [specific data] removed?

                    - How was [metric] calculated?

                    - What patterns were found?

                    - Are there any data quality issues?

                    """)
        
        # Event handlers
        def on_upload(file):
            status, show_interface, notebook_content, keypoints = agent.load_notebook(file)
            return (
                status,
                gr.update(visible=show_interface),
                notebook_content,
                keypoints
            )
        
        upload_btn.click(
            fn=on_upload,
            inputs=[file_input],
            outputs=[upload_status, main_interface, notebook_display, keypoints_display]
        )
        
        # Keypoints generation with loading state
        def generate_with_loading():
            return "⏳ **Analyzing document and generating insights...**\n\nThis may take 10-30 seconds depending on document complexity."
        
        keypoints_btn.click(
            fn=generate_with_loading,
            inputs=[],
            outputs=[keypoints_display]
        ).then(
            fn=agent.generate_keypoints,
            inputs=[],
            outputs=[keypoints_display]
        )
        
        # Analytics tab
        analytics_btn.click(
            fn=agent.generate_data_profile,
            inputs=[],
            outputs=[analytics_display]
        )
        
        # Recommendations tab
        rec_btn.click(
            fn=agent.get_recommendations,
            inputs=[],
            outputs=[rec_display]
        )
        
        # Advanced search
        search_btn.click(
            fn=agent.advanced_search,
            inputs=[search_input],
            outputs=[search_display]
        )
        
        # Export
        export_btn.click(
            fn=agent.export_analysis,
            inputs=[],
            outputs=[export_display]
        )
        
        # Q&A interaction
        ask_btn.click(
            fn=agent.ask_question,
            inputs=[query_input, chatbot],
            outputs=[chatbot, query_input]
        )
        
        query_input.submit(
            fn=agent.ask_question,
            inputs=[query_input, chatbot],
            outputs=[chatbot, query_input]
        )
    
    return demo


if __name__ == "__main__":
    demo = create_gradio_app()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )