Spaces:

chinmayjha
/

context-ai

Sleeping

File size: 11,313 Bytes

b27eb78

import json
import re
from typing import Any, Dict, List, Tuple

import gradio as gr
from smolagents import ToolCallingAgent


class CustomGradioUI:
    """Custom Gradio UI for better formatting of agent responses with source attribution."""
    
    def __init__(self, agent: ToolCallingAgent):
        self.agent = agent
        self.setup_ui()
    
    def setup_ui(self):
        """Setup the Gradio interface with custom components."""
        with gr.Blocks(
            title="Second Brain AI Assistant",
            theme=gr.themes.Soft(),
            css="""
            .source-card {
                border: 1px solid #e0e0e0;
                border-radius: 8px;
                padding: 12px;
                margin: 8px 0;
                background-color: #f8f9fa;
            }
            .source-title {
                font-weight: bold;
                color: #2c3e50;
                margin-bottom: 4px;
            }
            .source-date {
                font-size: 0.9em;
                color: #6c757d;
                margin-bottom: 8px;
            }
            .answer-section {
                background-color: #ffffff;
                border: 1px solid #dee2e6;
                border-radius: 8px;
                padding: 16px;
                margin-bottom: 16px;
            }
            .tool-usage {
                background-color: #e3f2fd;
                border-left: 4px solid #2196f3;
                padding: 8px 12px;
                margin: 8px 0;
                border-radius: 4px;
                font-size: 0.9em;
            }
            """
        ) as self.interface:
            
            gr.Markdown("# 🧠 Second Brain AI Assistant")
            gr.Markdown("Ask questions about your documents and get AI-powered insights with source attribution.")
            
            with gr.Row():
                with gr.Column(scale=4):
                    self.query_input = gr.Textbox(
                        label="Ask a question",
                        placeholder="What pricing objections were raised in the meetings?",
                        lines=2
                    )
                with gr.Column(scale=1):
                    self.submit_btn = gr.Button("Ask", variant="primary", size="lg")
            
            with gr.Row():
                with gr.Column():
                    self.answer_output = gr.HTML(label="Answer")
                    self.sources_output = gr.HTML(label="Sources")
                    self.tools_output = gr.HTML(label="Tools Used")
            
            with gr.Accordion("🔍 Debug: Raw Response", open=False):
                self.debug_output = gr.Textbox(
                    label="Raw Agent Response",
                    lines=10,
                    max_lines=20,
                    interactive=False
                )
            
            # Event handlers
            self.submit_btn.click(
                fn=self.process_query,
                inputs=[self.query_input],
                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
            )
            
            self.query_input.submit(
                fn=self.process_query,
                inputs=[self.query_input],
                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
            )
    
    def process_query(self, query: str) -> Tuple[str, str, str, str]:
        """Process the user query and return formatted response components."""
        if not query.strip():
            return "", "", "", ""
        
        try:
            # Run the agent
            result = self.agent.run(query)
            
            # Parse the result
            answer, sources, tools_used = self.parse_agent_response(result)
            
            # Debug information
            print(f"DEBUG - Raw result: {str(result)[:200]}...")
            print(f"DEBUG - Parsed answer: {answer[:100]}...")
            print(f"DEBUG - Sources found: {len(sources)}")
            print(f"DEBUG - Tools found: {tools_used}")
            
            # Format outputs
            answer_html = self.format_answer(answer)
            sources_html = self.format_sources(sources)
            tools_html = self.format_tools(tools_used)
            debug_text = str(result)
            
            return answer_html, sources_html, tools_html, debug_text
            
        except Exception as e:
            error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
            return error_msg, "", "", str(e)
    
    def parse_agent_response(self, result: Any) -> Tuple[str, List[Dict], List[str]]:
        """Parse the agent response to extract answer, sources, and tools used."""
        answer = ""
        sources = []
        tools_used = []
        
        # Convert result to string if it's not already
        result_str = str(result)
        
        # Extract tool usage from the result first
        # Pattern 1: 🛠️ Used tool toolname
        tool_pattern1 = r'🛠️ Used tool (\w+)'
        tool_matches1 = re.findall(tool_pattern1, result_str)
        
        # Pattern 2: Calling tool: 'toolname'
        tool_pattern2 = r"Calling tool:\s*'([^']+)'"
        tool_matches2 = re.findall(tool_pattern2, result_str)
        
        # Combine both patterns
        all_tool_matches = tool_matches1 + tool_matches2
        tools_used = list(set(all_tool_matches))  # Remove duplicates
        
        # Try multiple patterns to extract the answer
        # Pattern 1: JSON format with "answer" key
        json_match = re.search(r'{"answer":\s*"([^"]+)"}', result_str)
        if json_match:
            answer = json_match.group(1)
            # Unescape the JSON string
            answer = answer.replace('\\n', '\n').replace('\\"', '"')
        else:
            # Pattern 2: Look for "Final answer:" followed by content
            final_answer_match = re.search(r'Final answer:\s*(.+?)(?=\n\n|\Z)', result_str, re.DOTALL)
            if final_answer_match:
                answer = final_answer_match.group(1).strip()
                # Try to extract JSON from final answer
                json_in_final = re.search(r'{"answer":\s*"([^"]+)"}', answer)
                if json_in_final:
                    answer = json_in_final.group(1).replace('\\n', '\n').replace('\\"', '"')
            else:
                # Pattern 3: Use the entire result as answer if no specific pattern matches
                answer = result_str
        
        # Extract sources from the answer text using multiple patterns
        # Pattern 1: (Document: "Title", Date)
        source_pattern1 = r'\(Document:\s*"([^"]+)",\s*([^)]+)\)'
        source_matches1 = re.findall(source_pattern1, answer)
        
        # Pattern 2: (Document: Title, Date) - without quotes
        source_pattern2 = r'\(Document:\s*([^,]+),\s*([^)]+)\)'
        source_matches2 = re.findall(source_pattern2, answer)
        
        # Pattern 3: (Document 1, Date) - numbered format
        source_pattern3 = r'\(Document\s+(\d+),\s*([^)]+)\)'
        source_matches3 = re.findall(source_pattern3, answer)
        
        # Pattern 4: (from "Title" on Date) - new format seen in output
        source_pattern4 = r'\(from\s+"([^"]+)"\s+on\s+([^)]+)\)'
        source_matches4 = re.findall(source_pattern4, answer)
        
        # Pattern 5: (from "Title" on Date) - without quotes
        source_pattern5 = r'\(from\s+([^"]+)\s+on\s+([^)]+)\)'
        source_matches5 = re.findall(source_pattern5, answer)
        
        # Combine all patterns
        all_source_matches = source_matches1 + source_matches2 + source_matches3 + source_matches4 + source_matches5
        
        for doc_title, doc_date in all_source_matches:
            # Clean up the title and date
            clean_title = doc_title.strip().strip('"')
            clean_date = doc_date.strip()
            
            # Handle numbered documents (Document 1, Document 2, etc.)
            if clean_title.isdigit():
                clean_title = f"Document {clean_title}"
            
            sources.append({
                "title": clean_title,
                "date": clean_date
            })
        
        # Remove duplicates based on title and date
        unique_sources = []
        seen = set()
        for source in sources:
            key = (source["title"], source["date"])
            if key not in seen:
                seen.add(key)
                unique_sources.append(source)
        
        return answer, unique_sources, tools_used
    
    def format_answer(self, answer: str) -> str:
        """Format the answer with proper HTML structure."""
        if not answer:
            return "<div class='answer-section'><p>No answer provided.</p></div>"
        
        # Remove source references from the answer text for cleaner display
        answer = re.sub(r'\(Document:[^)]+\)', '', answer)
        
        # Clean up extra whitespace
        answer = re.sub(r'\s+', ' ', answer).strip()
        
        # Format numbered lists and bullet points
        answer = re.sub(r'\n\s*\d+\.\s*', '<br><br><strong>', answer)  # Numbered lists
        answer = re.sub(r'\n\s*•\s*', '<br>• ', answer)  # Bullet points
        answer = re.sub(r'\n\s*-\s*', '<br>• ', answer)  # Dash points
        
        # Format bold text (markdown style)
        answer = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', answer)
        
        # Format line breaks
        answer = answer.replace('\n', '<br>')
        
        # Clean up multiple line breaks
        answer = re.sub(r'(<br>){3,}', '<br><br>', answer)
        
        return f"""
        <div class='answer-section'>
            <h3>📝 Answer</h3>
            <div style='line-height: 1.6; font-size: 16px;'>{answer}</div>
        </div>
        """
    
    def format_sources(self, sources: List[Dict]) -> str:
        """Format the sources with proper HTML structure."""
        if not sources:
            return "<div><h3>📚 Sources</h3><p>No sources found.</p></div>"
        
        sources_html = "<div><h3>📚 Sources</h3>"
        
        for i, source in enumerate(sources, 1):
            sources_html += f"""
            <div class='source-card'>
                <div class='source-title'>{i}. {source['title']}</div>
                <div class='source-date'>📅 {source['date']}</div>
            </div>
            """
        
        sources_html += "</div>"
        return sources_html
    
    def format_tools(self, tools_used: List[str]) -> str:
        """Format the tools used with proper HTML structure."""
        if not tools_used:
            return "<div><h3>🛠️ Tools Used</h3><p>No tools used.</p></div>"
        
        tools_html = "<div><h3>🛠️ Tools Used</h3>"
        
        for tool in tools_used:
            tools_html += f"""
            <div class='tool-usage'>
                🔧 {tool.replace('_', ' ').title()}
            </div>
            """
        
        tools_html += "</div>"
        return tools_html
    
    def launch(self, **kwargs):
        """Launch the Gradio interface."""
        return self.interface.launch(**kwargs)