Spaces:

kardwalker
/

Search_Agent

Runtime error

App Files Files Community

Search_Agent / app.py

kardwalker

Update app.py

882be04 verified 7 months ago

raw

history blame contribute delete

15.7 kB

	import gradio as gr
	import asyncio
	import json
	import time
	from datetime import datetime
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from typing import Dict, List, Optional, Tuple
	import nest_asyncio

	# Apply nest_asyncio for compatibility with Gradio
	nest_asyncio.apply()

	# Import your existing search agent classes and functions
	# (Assuming all the previous code is imported or defined above)

	class GradioSearchInterface:
	def __init__(self):
	self.search_workflow = create_search_workflow()
	self.search_history = []
	self.performance_metrics = {
	'queries': 0,
	'avg_processing_time': 0,
	'avg_confidence': 0,
	'total_results': 0
	}

	async def process_search_async(self, query: str, intent_override: str = None) -> Tuple[str, str, str, str, str]:
	"""Process search query asynchronously"""
	if not query.strip():
	return "Please enter a search query.", "", "", "", ""

	# Initialize state
	initial_state = AgentState(
	query=query.strip(),
	intent=QueryIntent[intent_override] if intent_override and intent_override != "Auto-detect" else None,
	expanded_queries=[],
	search_results=[],
	semantic_index=None,
	ranked_results=[],
	verified_facts=[],
	answer="",
	confidence_score=0.0,
	error_log=[],
	cache_hits=0,
	processing_time=0.0,
	user_context={},
	iteration=0
	)

	start_time = time.time()

	try:
	# Run the workflow
	final_state = await self.search_workflow.ainvoke(initial_state)
	processing_time = time.time() - start_time

	# Update performance metrics
	self.performance_metrics['queries'] += 1
	self.performance_metrics['avg_processing_time'] = (
	(self.performance_metrics['avg_processing_time'] * (self.performance_metrics['queries'] - 1) + processing_time)
	/ self.performance_metrics['queries']
	)
	self.performance_metrics['avg_confidence'] = (
	(self.performance_metrics['avg_confidence'] * (self.performance_metrics['queries'] - 1) + final_state['confidence_score'])
	/ self.performance_metrics['queries']
	)
	self.performance_metrics['total_results'] += len(final_state['search_results'])

	# Store in history
	search_record = {
	'timestamp': datetime.now().isoformat(),
	'query': query,
	'intent': final_state['intent'].value if final_state['intent'] else 'unknown',
	'processing_time': processing_time,
	'confidence': final_state['confidence_score'],
	'results_count': len(final_state['search_results']),
	'answer': final_state['answer']
	}
	self.search_history.append(search_record)

	# Format results
	answer = final_state['answer']

	# Create summary
	summary = f"""
	## Search Summary
	- Query Intent: {final_state['intent'].value if final_state['intent'] else 'Unknown'}
	- Expanded Queries: {len(final_state['expanded_queries'])} queries generated
	- Total Results Found: {len(final_state['search_results'])} results
	- Top Results Analyzed: {len(final_state['ranked_results'])} results
	- Verified Facts: {len(final_state['verified_facts'])} facts
	- Processing Time: {processing_time:.2f} seconds
	- Confidence Score: {final_state['confidence_score']:.2%}
	"""

	# Format search results
	results_df = []
	for i, result in enumerate(final_state['ranked_results'][:10]): # Top 10 results
	results_df.append({
	'Rank': i + 1,
	'Title': result['title'][:100] + '...' if len(result['title']) > 100 else result['title'],
	'Source': result['source'].title(),
	'Authority Score': f"{result.get('authority_score', 0):.2f}",
	'Relevance Score': f"{result.get('relevance_score', 0):.2f}",
	'Composite Score': f"{result.get('composite_score', 0):.2f}",
	'URL': result['url']
	})

	results_table = pd.DataFrame(results_df) if results_df else pd.DataFrame()

	# Format verified facts
	facts_text = ""
	if final_state['verified_facts']:
	facts_text = "## Verified Facts\n\n"
	for i, fact in enumerate(final_state['verified_facts'][:5], 1):
	confidence = fact.get('confidence', 0)
	facts_text += f"{i}. {fact['fact']} (Confidence: {confidence:.1%})\n\n"

	# Error log
	errors = "\n".join(final_state['error_log']) if final_state['error_log'] else "No errors occurred."

	return answer, summary, results_table, facts_text, errors

	except Exception as e:
	error_msg = f"Error processing search: {str(e)}"
	return error_msg, "", pd.DataFrame(), "", error_msg

	def process_search(self, query: str, intent_override: str = "Auto-detect") -> Tuple[str, str, str, str, str]:
	"""Synchronous wrapper for async search processing"""
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	try:
	return loop.run_until_complete(self.process_search_async(query, intent_override))
	finally:
	loop.close()

	def get_search_history(self) -> pd.DataFrame:
	"""Get search history as DataFrame"""
	if not self.search_history:
	return pd.DataFrame()

	df = pd.DataFrame(self.search_history)
	df['timestamp'] = pd.to_datetime(df['timestamp'])
	return df[['timestamp', 'query', 'intent', 'processing_time', 'confidence', 'results_count']]

	def get_performance_chart(self):
	"""Create performance visualization"""
	if not self.search_history:
	return None

	df = pd.DataFrame(self.search_history)
	df['timestamp'] = pd.to_datetime(df['timestamp'])

	# Processing time over time
	fig = go.Figure()
	fig.add_trace(go.Scatter(
	x=df['timestamp'],
	y=df['processing_time'],
	mode='lines+markers',
	name='Processing Time (s)',
	line=dict(color='blue')
	))

	fig.update_layout(
	title='Search Performance Over Time',
	xaxis_title='Time',
	yaxis_title='Processing Time (seconds)',
	hovermode='x unified'
	)

	return fig

	def get_confidence_distribution(self):
	"""Create confidence score distribution"""
	if not self.search_history:
	return None

	df = pd.DataFrame(self.search_history)

	fig = px.histogram(
	df,
	x='confidence',
	nbins=20,
	title='Confidence Score Distribution',
	labels={'confidence': 'Confidence Score', 'count': 'Frequency'}
	)

	return fig

	def clear_history(self):
	"""Clear search history"""
	self.search_history = []
	self.performance_metrics = {
	'queries': 0,
	'avg_processing_time': 0,
	'avg_confidence': 0,
	'total_results': 0
	}
	return "Search history cleared!", pd.DataFrame(), None, None

	# Initialize the interface
	search_interface = GradioSearchInterface()

	# Create the Gradio interface
	def create_gradio_app():
	with gr.Blocks(
	title="Advanced Multi-Source Search Agent",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	.main-header {
	text-align: center;
	color: #2563eb;
	margin-bottom: 20px;
	}
	"""
	) as app:

	gr.Markdown(
	"""
	# 🔍 Advanced Multi-Source Search Agent

	This intelligent search agent combines multiple search engines, semantic analysis, and fact verification
	to provide comprehensive and reliable answers to your queries.

	Features:
	- Multi-source search (Google, DuckDuckGo)
	- Intent classification and query expansion
	- Semantic ranking and fact verification
	- Real-time performance analytics
	""",
	elem_classes=["main-header"]
	)

	with gr.Tab("🔍 Search"):
	with gr.Row():
	with gr.Column(scale=3):
	query_input = gr.Textbox(
	label="Search Query",
	placeholder="Enter your search query here...",
	lines=2
	)

	intent_dropdown = gr.Dropdown(
	choices=["Auto-detect"] + [intent.value.title() for intent in QueryIntent],
	value="Auto-detect",
	label="Query Intent (Optional)",
	info="Override automatic intent detection"
	)

	search_btn = gr.Button("🔍 Search", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### Quick Stats")
	stats_display = gr.Markdown("No searches yet.")

	with gr.Tab("📋 Results"):
	with gr.Row():
	with gr.Column():
	answer_output = gr.Markdown(label="Answer")

	with gr.Row():
	with gr.Column():
	summary_output = gr.Markdown(label="Search Summary")

	with gr.Column():
	facts_output = gr.Markdown(label="Verified Facts")

	with gr.Row():
	results_table = gr.DataFrame(
	label="Top Search Results",
	interactive=False,
	wrap=True
	)

	with gr.Tab("📊 Analytics"):
	with gr.Row():
	with gr.Column():
	performance_chart = gr.Plot(label="Performance Over Time")

	with gr.Column():
	confidence_chart = gr.Plot(label="Confidence Distribution")

	with gr.Row():
	history_table = gr.DataFrame(
	label="Search History",
	interactive=False
	)

	with gr.Tab("⚙️ System"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### System Information")
	system_info = gr.Markdown(
	"""
	Search Sources: Google, DuckDuckGo
	Embedding Model: all-MiniLM-L6-v2
	LLM: GPT-4o-mini (Azure)
	Semantic Search: FAISS
	Caching: Redis (if available)
	"""
	)

	with gr.Column():
	gr.Markdown("### Controls")
	clear_btn = gr.Button("🗑️ Clear History", variant="secondary")

	error_log = gr.Textbox(
	label="Error Log",
	lines=5,
	interactive=False
	)

	# Event handlers
	def update_stats():
	metrics = search_interface.performance_metrics
	return f"""
	Total Queries: {metrics['queries']}
	Avg Processing Time: {metrics['avg_processing_time']:.2f}s
	Avg Confidence: {metrics['avg_confidence']:.1%}
	Total Results: {metrics['total_results']}
	"""

	def search_and_update(query, intent):
	# Perform search
	answer, summary, results_df, facts, errors = search_interface.process_search(query, intent)

	# Update stats
	stats = update_stats()

	# Update history and charts
	history_df = search_interface.get_search_history()
	perf_chart = search_interface.get_performance_chart()
	conf_chart = search_interface.get_confidence_distribution()

	return (
	answer, # answer_output
	summary, # summary_output
	results_df, # results_table
	facts, # facts_output
	errors, # error_log
	stats, # stats_display
	history_df, # history_table
	perf_chart, # performance_chart
	conf_chart # confidence_chart
	)

	def clear_and_update():
	message, empty_df, empty_chart1, empty_chart2 = search_interface.clear_history()
	stats = update_stats()
	return message, empty_df, empty_chart1, empty_chart2, stats

	# Connect events
	search_btn.click(
	fn=search_and_update,
	inputs=[query_input, intent_dropdown],
	outputs=[
	answer_output,
	summary_output,
	results_table,
	facts_output,
	error_log,
	stats_display,
	history_table,
	performance_chart,
	confidence_chart
	]
	)

	query_input.submit(
	fn=search_and_update,
	inputs=[query_input, intent_dropdown],
	outputs=[
	answer_output,
	summary_output,
	results_table,
	facts_output,
	error_log,
	stats_display,
	history_table,
	performance_chart,
	confidence_chart
	]
	)

	clear_btn.click(
	fn=clear_and_update,
	outputs=[error_log, history_table, performance_chart, confidence_chart, stats_display]
	)

	# Load initial history on startup
	app.load(
	fn=lambda: (search_interface.get_search_history(), update_stats()),
	outputs=[history_table, stats_display]
	)

	return app

	# Launch the application
	if __name__ == "__main__":
	# Create and launch the Gradio app
	app = create_gradio_app()

	# Launch with custom settings
	app.launch(
	server_name="0.0.0.0", # Allow external access
	server_port=7860, # Default Gradio port
	share=False, # Set to True to create public link
	debug=True, # Enable debug mode
	show_error=True, # Show detailed errors
	favicon_path=None, # Add custom favicon if desired
	auth=None, # Add authentication if needed: ("username", "password")
	)