#!/usr/bin/env python # coding: utf-8 # # Chatbot Program # # #### Chatbot with Evaluator - Hugging Face Deployment Ready # - Primary Agent: Google Gemini (via OpenAI API) # - Evaluator: Groq Llama 3.3 70B # - Fast API-based inference (no local models) # In[ ]: # imports import os import gradio as gr from openai import OpenAI import time from typing import Tuple, Optional import json from dotenv import load_dotenv # In[ ]: load_dotenv(override=True) # In[ ]: # Check for API keys GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") GROQ_API_KEY = os.getenv("GROQ_API_KEY") if GOOGLE_API_KEY: print(f"Google API Key exists and begins {GOOGLE_API_KEY[:2]}") else: print("Google API Key not set (and this is optional)") if GROQ_API_KEY: print(f"Groq API Key exists and begins {GROQ_API_KEY[:4]}") else: print("Groq API Key not set (and this is optional)") # In[ ]: # Model configurations AGENT_MODELS = { # "Gemini Pro": { # "model": "gemini-pro", # "description": "Google's Gemini Pro model", # "max_tokens": 2048 # }, "Gemini 1.5 flash": { "model": "gemini-1.5-flash", "description": "Fast Gemini model", "max_tokens": 2048 } # "Gemini 1.5 Pro": { # "model": "gemini-1.5-pro", # "description": "Advanced Gemini model", # "max_tokens": 2048 # } } EVALUATOR_MODELS = { "Llama 3.3 70B": { "model": "llama-3.3-70b-versatile", "description": "Groq's Llama 3.3 70B - Fast & Powerful" } # "Llama 3.1 70B": { # "model": "llama-3.1-70b-versatile", # "description": "Groq's Llama 3.1 70B" # }, # "Mixtral 8x7B": { # "model": "mixtral-8x7b-32768", # "description": "Groq's Mixtral model" # } } # In[ ]: # =========================== # API Client Management Class # =========================== class APIClientManager: def __init__(self): self.gemini_client = None self.groq_client = None self.errors = [] self.initialize_clients() def initialize_clients(self): """Initialize API clients with error handling.""" # Get API keys from environment google_api_key = os.getenv("GOOGLE_API_KEY") groq_api_key = os.getenv("GROQ_API_KEY") # Initialize Gemini client if google_api_key: try: self.gemini_client = OpenAI( api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/" ) print("✅ Gemini API client initialized") except Exception as e: self.errors.append(f"Gemini initialization error: {e}") else: self.errors.append("GOOGLE_API_KEY not found in environment variables") # Initialize Groq client if groq_api_key: try: self.groq_client = OpenAI( api_key=groq_api_key, base_url="https://api.groq.com/openai/v1" ) print("✅ Groq API client initialized") except Exception as e: self.errors.append(f"Groq initialization error: {e}") else: self.errors.append("GROQ_API_KEY not found in environment variables") def create_evaluator_prompt(self, user_input: str, agent_response: str) -> str: """Create the evaluation prompt.""" evaluator_prompt = ( "You are an evaluator that decides whether a response to a question is acceptable. " "You are provided with a conversation between a User and an Agent. " "Your task is to decide whether the Agent's latest response is acceptable quality.\n\n" f"User Question: {user_input}\n\n" f"Agent Response: {agent_response}\n\n" "With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\n\n" "Format your evaluation as follows:\n" "1. Start with either 'ACCEPTABLE ✅' or 'UNACCEPTABLE ❌'\n" "2. Provide a brief quality score (1-10)\n" "3. List 2-3 specific strengths or issues\n" "4. Suggest one improvement if needed" ) return evaluator_prompt def generate_agent_response( self, user_input: str, model_name: str = "Gemini 1.5 flash", temperature: float = 0.7, max_tokens: int = 500 ) -> Tuple[str, str, float]: """Generate response using Gemini API.""" if not self.gemini_client: return "❌ Gemini API not initialized. Please set GOOGLE_API_KEY environment variable.", "Error", 0 try: model_config = AGENT_MODELS.get(model_name, AGENT_MODELS["Gemini 1.5 flash"]) model_id = model_config["model"] # Make API call to Gemini start_time = time.time() response = self.gemini_client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": "You are a helpful AI assistant. Provide clear, accurate, and helpful responses."}, {"role": "user", "content": user_input} ], temperature=temperature, max_tokens=min(max_tokens, model_config["max_tokens"]), top_p=0.9 ) elapsed_time = time.time() - start_time # Extract response agent_response = response.choices[0].message.content status = f"✅ {model_name} responded in {elapsed_time:.2f}s" return agent_response, status, elapsed_time except Exception as e: error_msg = f"❌ Gemini API error: {str(e)}" print(error_msg) # Check for common errors if "API key" in str(e): error_msg = "❌ Invalid Google API key. Please check GOOGLE_API_KEY." elif "quota" in str(e).lower(): error_msg = "❌ API quota exceeded. Please try again later." elif "model" in str(e).lower(): error_msg = f"❌ Model '{model_name}' not available. Try another model." return error_msg, "Error", 0 def evaluate_response( self, user_input: str, agent_response: str, evaluator_model: str = "Llama 3.3 70B", temperature: float = 0.3 ) -> Tuple[str, str, float]: """Evaluate the agent's response using Groq API.""" if not self.groq_client: return "❌ Groq API not initialized. Please set GROQ_API_KEY environment variable.", "Error", 0 try: model_config = EVALUATOR_MODELS.get(evaluator_model, EVALUATOR_MODELS["Llama 3.3 70B"]) model_id = model_config["model"] # Create evaluation prompt using the class method eval_prompt = self.create_evaluator_prompt(user_input, agent_response) # Make API call to Groq start_time = time.time() response = self.groq_client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": "You are a critical evaluator. Be honest but constructive in your feedback."}, {"role": "user", "content": eval_prompt} ], temperature=temperature, max_tokens=300, top_p=0.9 ) elapsed_time = time.time() - start_time # Extract evaluation evaluation = response.choices[0].message.content # Determine status based on evaluation if "ACCEPTABLE" in evaluation.upper(): status = f"✅ Evaluation: Acceptable | {evaluator_model} ({elapsed_time:.2f}s)" elif "UNACCEPTABLE" in evaluation.upper(): status = f"❌ Evaluation: Needs Improvement | {evaluator_model} ({elapsed_time:.2f}s)" else: status = f"🔍 Evaluation Complete | {evaluator_model} ({elapsed_time:.2f}s)" return evaluation, status, elapsed_time except Exception as e: error_msg = f"❌ Groq API error: {str(e)}" print(error_msg) # Check for common errors if "API key" in str(e): error_msg = "❌ Invalid Groq API key. Please check GROQ_API_KEY." elif "rate" in str(e).lower(): error_msg = "❌ Rate limit exceeded. Please wait a moment and try again." elif "model" in str(e).lower(): error_msg = f"❌ Model '{evaluator_model}' not available." return error_msg, "Error", 0 # In[ ]: # =========================== # Initialize Global Client Manager # =========================== api_manager = APIClientManager() # In[ ]: # =========================== # Main Processing Function # =========================== def process_with_evaluation( user_input: str, agent_model: str, evaluator_model: str, temperature: float, max_tokens: int, enable_evaluation: bool ) -> Tuple[str, str, str, str]: """Process user input through agent and optionally evaluate.""" if not user_input.strip(): return "Please enter a message.", "", "No input provided", "" # Step 1: Generate agent response agent_response, agent_status, agent_time = api_manager.generate_agent_response( user_input, agent_model, temperature, max_tokens ) # Step 2: Evaluate response (if enabled) if enable_evaluation and "Error" not in agent_status: evaluation, eval_status, eval_time = api_manager.evaluate_response( user_input, agent_response, evaluator_model, temperature=0.3 # Lower temp for evaluation ) # Combine status total_time = agent_time + eval_time combined_status = f"Agent: {agent_model} ({agent_time:.2f}s) | Evaluator: {evaluator_model} ({eval_time:.2f}s) | Total: {total_time:.2f}s" # Format evaluation for better display if "ACCEPTABLE" in evaluation.upper(): eval_summary = "✅ Response Quality: ACCEPTABLE" elif "UNACCEPTABLE" in evaluation.upper(): eval_summary = "❌ Response Quality: NEEDS IMPROVEMENT" else: eval_summary = "🔍 Evaluation Complete" else: evaluation = "Evaluation disabled or skipped due to error" if not enable_evaluation else "Skipped due to agent error" eval_summary = "🔕 No evaluation performed" combined_status = agent_status return agent_response, evaluation, combined_status, eval_summary # In[ ]: # =========================== # Gradio Interface # =========================== def create_interface(): """Create the Gradio interface.""" css = """ .gradio-container { max-width: 1400px !important; margin: auto; } .response-box { background: #f0f9ff; border-left: 4px solid #3b82f6; padding: 12px; border-radius: 8px; } .evaluation-box { background: #fef3c7; border-left: 4px solid #f59e0b; padding: 12px; border-radius: 8px; } .status-box { font-family: monospace; font-size: 12px; color: #6b7280; } .error-box { background: #fee2e2; border-left: 4px solid #ef4444; padding: 12px; border-radius: 8px; } .success-indicator { color: #10b981; font-weight: bold; } .warning-indicator { color: #f59e0b; font-weight: bold; } """ with gr.Blocks( title="AI Chatbot with Cross-Model Evaluator", theme=gr.themes.Soft(), css=css ) as demo: # Header gr.Markdown(""" # 🤖 AI Chatbot with Cross-Model Evaluator ### **Agent:** Google Gemini 1.5 flash | **Evaluator:** Groq Llama 3.3 70B This system uses two different AI models: 1. **Gemini** generates responses to your questions 2. **Llama 70B** evaluates the quality of those responses """) # API Status if api_manager.errors: with gr.Group(): gr.Markdown("### ⚠️ Setup Issues:") for error in api_manager.errors: gr.Markdown(f"- {error}") gr.Markdown(""" **To fix:** ```bash export GOOGLE_API_KEY="your-google-api-key" export GROQ_API_KEY="your-groq-api-key" ``` Get keys from: - [Google AI Studio](https://makersuite.google.com/app/apikey) - [Groq Console](https://console.groq.com/keys) """) else: gr.Markdown("✅ **All API clients initialized successfully**") with gr.Row(): # Left Column - Input Controls with gr.Column(scale=2): # Model Selection with gr.Group(): gr.Markdown("### 🎯 Model Selection") agent_model = gr.Dropdown( choices=list(AGENT_MODELS.keys()), value="Gemini 1.5 flash", label="Agent Model (Response Generator)", info="Google Gemini model for generating responses" ) evaluator_model = gr.Dropdown( choices=list(EVALUATOR_MODELS.keys()), value="Llama 3.3 70B", label="Evaluator Model", info="Groq model for evaluating response quality" ) # User Input user_input = gr.Textbox( lines=4, placeholder="Ask me anything... For example: 'Explain quantum computing in simple terms'", label="💬 Your Question", max_lines=8 ) # Settings with gr.Group(): gr.Markdown("### ⚙️ Generation Settings") with gr.Row(): temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature (Creativity)", info="Higher = more creative, Lower = more focused" ) max_tokens = gr.Slider( minimum=50, maximum=1000, value=500, step=50, label="Max Tokens", info="Maximum response length" ) enable_evaluation = gr.Checkbox( value=True, label="🔍 Enable Cross-Model Evaluation", info="Let Llama 70B evaluate Gemini's response" ) # Action Buttons with gr.Row(): generate_btn = gr.Button( "🚀 Generate & Evaluate", variant="primary", size="lg" ) clear_btn = gr.Button("🗑️ Clear All", size="lg") # Right Column - Outputs with gr.Column(scale=3): # Quality Indicator quality_indicator = gr.Textbox( label="📊 Response Quality", interactive=False, lines=1 ) # Agent Response with gr.Group(): gr.Markdown("### 🤖 Agent Response") agent_output = gr.Textbox( lines=10, label="Gemini's Response", show_copy_button=True, interactive=False, elem_classes=["response-box"] ) # Evaluation with gr.Group(): gr.Markdown("### 🔍 Evaluation Result") evaluation_output = gr.Textbox( lines=8, label="Llama's Evaluation", show_copy_button=True, interactive=False, elem_classes=["evaluation-box"] ) # Status status_output = gr.Textbox( lines=2, label="⏱️ Performance Metrics", interactive=False, elem_classes=["status-box"] ) # Examples with gr.Row(): gr.Examples( examples=[ ["What is the difference between machine learning and deep learning?"], ["Write a Python function to calculate the factorial of a number"], ["Explain the theory of relativity in simple terms"], ["What are the main causes of climate change?"], ["How does blockchain technology work?"], ["What are the benefits and risks of artificial intelligence?"] ], inputs=user_input, label="💡 Example Questions" ) # How It Works with gr.Accordion("ℹ️ How Cross-Model Evaluation Works", open=False): gr.Markdown(""" ### The Two-Stage Process: **1. Response Generation (Gemini)** - Receives your question - Generates a comprehensive response - Optimized for helpfulness and accuracy **2. Quality Evaluation (Llama 70B)** - Analyzes the response for: - Accuracy and completeness - Clarity and coherence - Potential issues or biases - Provides feedback and improvement suggestions ### Benefits: - ✅ **Quality Assurance**: Second model checks for errors - ✅ **Bias Detection**: Different model perspectives - ✅ **Improvement Insights**: Specific feedback on responses - ✅ **Fast Processing**: API-based, no local model loading ### API Requirements: - Google API Key for Gemini (free tier available) - Groq API Key for Llama (free tier available) """) # Event Handlers generate_btn.click( fn=process_with_evaluation, inputs=[user_input, agent_model, evaluator_model, temperature, max_tokens, enable_evaluation], outputs=[agent_output, evaluation_output, status_output, quality_indicator] ) clear_btn.click( fn=lambda: ("", "", "", ""), outputs=[user_input, agent_output, evaluation_output, status_output] ) user_input.submit( fn=process_with_evaluation, inputs=[user_input, agent_model, evaluator_model, temperature, max_tokens, enable_evaluation], outputs=[agent_output, evaluation_output, status_output, quality_indicator] ) return demo # In[ ]: # =========================== # Main Execution # =========================== if __name__ == "__main__": print("=" * 60) print("🚀 AI Chatbot with Cross-Model Evaluator") print("=" * 60) # Check API keys google_key = os.getenv("GOOGLE_API_KEY") groq_key = os.getenv("GROQ_API_KEY") if not google_key: print("⚠️ Warning: GOOGLE_API_KEY not found") print(" Set it with: export GOOGLE_API_KEY='your-key-here'") else: print(f"✅ Google API Key detected: {google_key[:10]}...") if not groq_key: print("⚠️ Warning: GROQ_API_KEY not found") print(" Set it with: export GROQ_API_KEY='your-key-here'") else: print(f"✅ Groq API Key detected: {groq_key[:10]}...") print("=" * 60) print("📝 Starting Gradio interface...") print("📌 Interface will be available at: http://localhost:7860") print("=" * 60) # Create and launch interface demo = create_interface() demo.launch() # In[ ]: