Spaces:

kardwalker
/

Search_Agent

Runtime error

File size: 15,692 Bytes

import gradio as gr
import asyncio
import json
import time
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from typing import Dict, List, Optional, Tuple
import nest_asyncio

# Apply nest_asyncio for compatibility with Gradio
nest_asyncio.apply()

# Import your existing search agent classes and functions
# (Assuming all the previous code is imported or defined above)

class GradioSearchInterface:
    def __init__(self):
        self.search_workflow = create_search_workflow()
        self.search_history = []
        self.performance_metrics = {
            'queries': 0,
            'avg_processing_time': 0,
            'avg_confidence': 0,
            'total_results': 0
        }
    
    async def process_search_async(self, query: str, intent_override: str = None) -> Tuple[str, str, str, str, str]:
        """Process search query asynchronously"""
        if not query.strip():
            return "Please enter a search query.", "", "", "", ""
        
        # Initialize state
        initial_state = AgentState(
            query=query.strip(),
            intent=QueryIntent[intent_override] if intent_override and intent_override != "Auto-detect" else None,
            expanded_queries=[],
            search_results=[],
            semantic_index=None,
            ranked_results=[],
            verified_facts=[],
            answer="",
            confidence_score=0.0,
            error_log=[],
            cache_hits=0,
            processing_time=0.0,
            user_context={},
            iteration=0
        )
        
        start_time = time.time()
        
        try:
            # Run the workflow
            final_state = await self.search_workflow.ainvoke(initial_state)
            processing_time = time.time() - start_time
            
            # Update performance metrics
            self.performance_metrics['queries'] += 1
            self.performance_metrics['avg_processing_time'] = (
                (self.performance_metrics['avg_processing_time'] * (self.performance_metrics['queries'] - 1) + processing_time) 
                / self.performance_metrics['queries']
            )
            self.performance_metrics['avg_confidence'] = (
                (self.performance_metrics['avg_confidence'] * (self.performance_metrics['queries'] - 1) + final_state['confidence_score']) 
                / self.performance_metrics['queries']
            )
            self.performance_metrics['total_results'] += len(final_state['search_results'])
            
            # Store in history
            search_record = {
                'timestamp': datetime.now().isoformat(),
                'query': query,
                'intent': final_state['intent'].value if final_state['intent'] else 'unknown',
                'processing_time': processing_time,
                'confidence': final_state['confidence_score'],
                'results_count': len(final_state['search_results']),
                'answer': final_state['answer']
            }
            self.search_history.append(search_record)
            
            # Format results
            answer = final_state['answer']
            
            # Create summary
            summary = f"""
## Search Summary
- **Query Intent**: {final_state['intent'].value if final_state['intent'] else 'Unknown'}
- **Expanded Queries**: {len(final_state['expanded_queries'])} queries generated
- **Total Results Found**: {len(final_state['search_results'])} results
- **Top Results Analyzed**: {len(final_state['ranked_results'])} results
- **Verified Facts**: {len(final_state['verified_facts'])} facts
- **Processing Time**: {processing_time:.2f} seconds
- **Confidence Score**: {final_state['confidence_score']:.2%}
"""
            
            # Format search results
            results_df = []
            for i, result in enumerate(final_state['ranked_results'][:10]):  # Top 10 results
                results_df.append({
                    'Rank': i + 1,
                    'Title': result['title'][:100] + '...' if len(result['title']) > 100 else result['title'],
                    'Source': result['source'].title(),
                    'Authority Score': f"{result.get('authority_score', 0):.2f}",
                    'Relevance Score': f"{result.get('relevance_score', 0):.2f}",
                    'Composite Score': f"{result.get('composite_score', 0):.2f}",
                    'URL': result['url']
                })
            
            results_table = pd.DataFrame(results_df) if results_df else pd.DataFrame()
            
            # Format verified facts
            facts_text = ""
            if final_state['verified_facts']:
                facts_text = "## Verified Facts\n\n"
                for i, fact in enumerate(final_state['verified_facts'][:5], 1):
                    confidence = fact.get('confidence', 0)
                    facts_text += f"{i}. **{fact['fact']}** (Confidence: {confidence:.1%})\n\n"
            
            # Error log
            errors = "\n".join(final_state['error_log']) if final_state['error_log'] else "No errors occurred."
            
            return answer, summary, results_table, facts_text, errors
            
        except Exception as e:
            error_msg = f"Error processing search: {str(e)}"
            return error_msg, "", pd.DataFrame(), "", error_msg
    
    def process_search(self, query: str, intent_override: str = "Auto-detect") -> Tuple[str, str, str, str, str]:
        """Synchronous wrapper for async search processing"""
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        try:
            return loop.run_until_complete(self.process_search_async(query, intent_override))
        finally:
            loop.close()
    
    def get_search_history(self) -> pd.DataFrame:
        """Get search history as DataFrame"""
        if not self.search_history:
            return pd.DataFrame()
        
        df = pd.DataFrame(self.search_history)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df[['timestamp', 'query', 'intent', 'processing_time', 'confidence', 'results_count']]
    
    def get_performance_chart(self):
        """Create performance visualization"""
        if not self.search_history:
            return None
        
        df = pd.DataFrame(self.search_history)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Processing time over time
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=df['timestamp'],
            y=df['processing_time'],
            mode='lines+markers',
            name='Processing Time (s)',
            line=dict(color='blue')
        ))
        
        fig.update_layout(
            title='Search Performance Over Time',
            xaxis_title='Time',
            yaxis_title='Processing Time (seconds)',
            hovermode='x unified'
        )
        
        return fig
    
    def get_confidence_distribution(self):
        """Create confidence score distribution"""
        if not self.search_history:
            return None
        
        df = pd.DataFrame(self.search_history)
        
        fig = px.histogram(
            df, 
            x='confidence', 
            nbins=20,
            title='Confidence Score Distribution',
            labels={'confidence': 'Confidence Score', 'count': 'Frequency'}
        )
        
        return fig
    
    def clear_history(self):
        """Clear search history"""
        self.search_history = []
        self.performance_metrics = {
            'queries': 0,
            'avg_processing_time': 0,
            'avg_confidence': 0,
            'total_results': 0
        }
        return "Search history cleared!", pd.DataFrame(), None, None

# Initialize the interface
search_interface = GradioSearchInterface()

# Create the Gradio interface
def create_gradio_app():
    with gr.Blocks(
        title="Advanced Multi-Source Search Agent",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
        }
        .main-header {
            text-align: center;
            color: #2563eb;
            margin-bottom: 20px;
        }
        """
    ) as app:
        
        gr.Markdown(
            """
            # 🔍 Advanced Multi-Source Search Agent
            
            This intelligent search agent combines multiple search engines, semantic analysis, and fact verification 
            to provide comprehensive and reliable answers to your queries.
            
            **Features:**
            - Multi-source search (Google, DuckDuckGo)
            - Intent classification and query expansion
            - Semantic ranking and fact verification
            - Real-time performance analytics
            """,
            elem_classes=["main-header"]
        )
        
        with gr.Tab("🔍 Search"):
            with gr.Row():
                with gr.Column(scale=3):
                    query_input = gr.Textbox(
                        label="Search Query",
                        placeholder="Enter your search query here...",
                        lines=2
                    )
                    
                    intent_dropdown = gr.Dropdown(
                        choices=["Auto-detect"] + [intent.value.title() for intent in QueryIntent],
                        value="Auto-detect",
                        label="Query Intent (Optional)",
                        info="Override automatic intent detection"
                    )
                    
                    search_btn = gr.Button("🔍 Search", variant="primary", size="lg")
                
                with gr.Column(scale=1):
                    gr.Markdown("### Quick Stats")
                    stats_display = gr.Markdown("No searches yet.")
        
        with gr.Tab("📋 Results"):
            with gr.Row():
                with gr.Column():
                    answer_output = gr.Markdown(label="Answer")
                    
            with gr.Row():
                with gr.Column():
                    summary_output = gr.Markdown(label="Search Summary")
                    
                with gr.Column():
                    facts_output = gr.Markdown(label="Verified Facts")
            
            with gr.Row():
                results_table = gr.DataFrame(
                    label="Top Search Results",
                    interactive=False,
                    wrap=True
                )
        
        with gr.Tab("📊 Analytics"):
            with gr.Row():
                with gr.Column():
                    performance_chart = gr.Plot(label="Performance Over Time")
                    
                with gr.Column():
                    confidence_chart = gr.Plot(label="Confidence Distribution")
            
            with gr.Row():
                history_table = gr.DataFrame(
                    label="Search History",
                    interactive=False
                )
        
        with gr.Tab("⚙️ System"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### System Information")
                    system_info = gr.Markdown(
                        """
                        **Search Sources:** Google, DuckDuckGo
                        **Embedding Model:** all-MiniLM-L6-v2
                        **LLM:** GPT-4o-mini (Azure)
                        **Semantic Search:** FAISS
                        **Caching:** Redis (if available)
                        """
                    )
                    
                with gr.Column():
                    gr.Markdown("### Controls")
                    clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
                    
                    error_log = gr.Textbox(
                        label="Error Log",
                        lines=5,
                        interactive=False
                    )
        
        # Event handlers
        def update_stats():
            metrics = search_interface.performance_metrics
            return f"""
            **Total Queries:** {metrics['queries']}
            **Avg Processing Time:** {metrics['avg_processing_time']:.2f}s
            **Avg Confidence:** {metrics['avg_confidence']:.1%}
            **Total Results:** {metrics['total_results']}
            """
        
        def search_and_update(query, intent):
            # Perform search
            answer, summary, results_df, facts, errors = search_interface.process_search(query, intent)
            
            # Update stats
            stats = update_stats()
            
            # Update history and charts
            history_df = search_interface.get_search_history()
            perf_chart = search_interface.get_performance_chart()
            conf_chart = search_interface.get_confidence_distribution()
            
            return (
                answer,           # answer_output
                summary,          # summary_output
                results_df,       # results_table
                facts,            # facts_output
                errors,           # error_log
                stats,            # stats_display
                history_df,       # history_table
                perf_chart,       # performance_chart
                conf_chart        # confidence_chart
            )
        
        def clear_and_update():
            message, empty_df, empty_chart1, empty_chart2 = search_interface.clear_history()
            stats = update_stats()
            return message, empty_df, empty_chart1, empty_chart2, stats
        
        # Connect events
        search_btn.click(
            fn=search_and_update,
            inputs=[query_input, intent_dropdown],
            outputs=[
                answer_output,
                summary_output,
                results_table,
                facts_output,
                error_log,
                stats_display,
                history_table,
                performance_chart,
                confidence_chart
            ]
        )
        
        query_input.submit(
            fn=search_and_update,
            inputs=[query_input, intent_dropdown],
            outputs=[
                answer_output,
                summary_output,
                results_table,
                facts_output,
                error_log,
                stats_display,
                history_table,
                performance_chart,
                confidence_chart
            ]
        )
        
        clear_btn.click(
            fn=clear_and_update,
            outputs=[error_log, history_table, performance_chart, confidence_chart, stats_display]
        )
        
        # Load initial history on startup
        app.load(
            fn=lambda: (search_interface.get_search_history(), update_stats()),
            outputs=[history_table, stats_display]
        )
    
    return app

# Launch the application
if __name__ == "__main__":
    # Create and launch the Gradio app
    app = create_gradio_app()
    
    # Launch with custom settings
    app.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7860,       # Default Gradio port
        share=False,            # Set to True to create public link
        debug=True,             # Enable debug mode
        show_error=True,        # Show detailed errors
        favicon_path=None,      # Add custom favicon if desired
        auth=None,              # Add authentication if needed: ("username", "password")
    )