import gradio as gr from huggingface_hub import InferenceClient from concurrent.futures import ThreadPoolExecutor, as_completed import time from datetime import datetime import os # Use Hugging Face Inference API (no model loading needed!) # This is FREE and much faster! AGENT_CONFIGS = { "researcher": { "model": "mistralai/Mistral-7B-Instruct-v0.2", "role": "Research and gather information", "system_prompt": "You are a research agent specialized in gathering and analyzing information. Provide detailed, well-researched responses." }, "coder": { "model": "bigcode/starcoder2-15b", "role": "Generate and explain code", "system_prompt": "You are an expert programmer. Generate clean, efficient, well-commented code." }, "analyzer": { "model": "mistralai/Mistral-7B-Instruct-v0.2", "role": "Analyze data and provide insights", "system_prompt": "You are a data analyst. Provide clear insights and actionable recommendations." }, "writer": { "model": "mistralai/Mistral-7B-Instruct-v0.2", "role": "Create content and documentation", "system_prompt": "You are a technical writer. Create clear, professional documentation and content." } } class AgentSystem: def __init__(self): # No model loading! Using HF Inference API self.clients = {} self.executor = ThreadPoolExecutor(max_workers=4) # Initialize inference clients for each agent for agent_name in AGENT_CONFIGS.keys(): model = AGENT_CONFIGS[agent_name]["model"] self.clients[agent_name] = InferenceClient(model=model) print("✅ Agent system initialized with Inference API!") def generate_response(self, agent_name, task, max_tokens=300): """Generate response using HF Inference API""" try: config = AGENT_CONFIGS[agent_name] client = self.clients[agent_name] # Create prompt messages = [ { "role": "system", "content": config["system_prompt"] }, { "role": "user", "content": f"Task: {task}" } ] # Generate response response_text = "" for message in client.chat_completion( messages=messages, max_tokens=max_tokens, temperature=0.7, stream=True ): if hasattr(message.choices[0].delta, 'content'): response_text += message.choices[0].delta.content return { "agent": agent_name, "role": config["role"], "response": response_text.strip(), "status": "success" } except Exception as e: return { "agent": agent_name, "role": AGENT_CONFIGS[agent_name]["role"], "response": f"Error: {str(e)}", "status": "error" } def run_agents_parallel(self, task, selected_agents, max_tokens=300): """Run multiple agents in parallel""" start_time = time.time() futures = {} results = [] # Submit tasks to thread pool for agent_name in selected_agents: future = self.executor.submit( self.generate_response, agent_name, task, max_tokens ) futures[future] = agent_name # Collect results as they complete for future in as_completed(futures): agent_name = futures[future] try: result = future.result(timeout=30) # 30 second timeout per agent result["time_taken"] = round(time.time() - start_time, 2) results.append(result) except Exception as e: results.append({ "agent": agent_name, "role": AGENT_CONFIGS[agent_name]["role"], "response": f"Timeout or error: {str(e)}", "status": "error", "time_taken": round(time.time() - start_time, 2) }) total_time = round(time.time() - start_time, 2) return results, total_time # Initialize the agent system print("🚀 Initializing AI Agent System...") agent_system = AgentSystem() print("✅ System ready!") def process_task(task, researcher, coder, analyzer, writer, max_tokens, progress=gr.Progress()): """Process task with selected agents""" if not task.strip(): return "⚠️ Please enter a task!", "", "" # Determine which agents to use selected_agents = [] if researcher: selected_agents.append("researcher") if coder: selected_agents.append("coder") if analyzer: selected_agents.append("analyzer") if writer: selected_agents.append("writer") if not selected_agents: return "⚠️ Please select at least one agent!", "", "" progress(0, desc="Starting agents...") # Run agents in parallel results, total_time = agent_system.run_agents_parallel(task, selected_agents, max_tokens) progress(1, desc="Complete!") # Format output output = f"# 🤖 AI Agent System Results\n\n" output += f"**Task:** {task}\n\n" output += f"**Agents Used:** {len(selected_agents)} agents running in parallel\n\n" output += f"**Total Time:** {total_time}s\n\n" output += "---\n\n" for idx, result in enumerate(results, 1): status_emoji = "✅" if result["status"] == "success" else "❌" output += f"## {status_emoji} Agent {idx}: {result['agent'].upper()}\n\n" output += f"**Role:** {result['role']}\n\n" output += f"**Response:**\n\n{result['response']}\n\n" output += f"*⏱️ Completed in {result['time_taken']}s*\n\n" output += "---\n\n" # Create summary stats success_count = sum(1 for r in results if r["status"] == "success") stats = f"""📊 **Execution Stats** - Total Agents: {len(selected_agents)} - Successful: {success_count} - Failed: {len(selected_agents) - success_count} - Total Time: {total_time}s - Average per Agent: {round(total_time / len(selected_agents), 2)}s """ # Detailed JSON for download details = { "task": task, "agents_used": selected_agents, "total_time": total_time, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "results": results } import json json_output = json.dumps(details, indent=2) return output, stats, json_output # Create Gradio Interface custom_css = """ .gradio-container { font-family: 'Inter', sans-serif; } .main-header { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; } """ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Agent System") as demo: gr.HTML("""

🤖 Multi-Agent AI System

Parallel AI Processing with Specialized Agents | Powered by Hugging Face Inference API

""") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📝 Task Configuration") task_input = gr.Textbox( label="What do you want the agents to work on?", placeholder="Example: Build a user authentication system with JWT tokens", lines=5 ) gr.Markdown("### 🎯 Select Your Team") with gr.Group(): researcher_check = gr.Checkbox( label="🔍 Researcher Agent", value=True, info="Gathers information and best practices" ) coder_check = gr.Checkbox( label="💻 Coder Agent", value=True, info="Writes production-ready code" ) analyzer_check = gr.Checkbox( label="📊 Analyzer Agent", value=True, info="Provides insights and recommendations" ) writer_check = gr.Checkbox( label="✍️ Writer Agent", value=True, info="Creates documentation" ) gr.Markdown("### ⚙️ Settings") max_tokens = gr.Slider( minimum=100, maximum=500, value=300, step=50, label="Response Length", info="Tokens per agent response" ) process_btn = gr.Button( "🚀 Deploy Agents", variant="primary", size="lg" ) gr.Markdown(""" ### 💡 Pro Tips - Use all 4 agents for comprehensive results - Agents run simultaneously = 3-4x faster! - Each agent brings unique expertise - No model downloads = instant startup """) with gr.Column(scale=2): gr.Markdown("### 📊 Results Dashboard") output_display = gr.Markdown( value="*Results will appear here after running agents...*", label="Agent Outputs" ) with gr.Accordion("📈 Execution Statistics", open=True): stats_display = gr.Markdown(value="*No data yet*") with gr.Accordion("💾 Download Results (JSON)", open=False): json_output = gr.Code( label="Complete Results", language="json", lines=10 ) gr.Markdown("### 📚 Quick Start Examples") gr.Examples( examples=[ ["Create a REST API for a todo list application with authentication"], ["Build a machine learning pipeline for image classification"], ["Design a microservices architecture for an e-commerce platform"], ["Develop a real-time chat application using WebSockets"], ["Create a data visualization dashboard for sales analytics"], ], inputs=task_input ) gr.Markdown(""" --- ## 🏗️ System Architecture **How It Works:** 1. **Task Distribution** → Your task is sent to selected agents 2. **Parallel Processing** → All agents work simultaneously (not sequential!) 3. **Smart Aggregation** → Results are collected as they complete 4. **Instant Results** → See output from each agent in real-time **Technology:** - ⚡ Hugging Face Inference API (serverless, no model loading) - 🔄 ThreadPoolExecutor for true parallelism - 🚀 Free tier compatible - 📊 Real-time progress tracking **Models Used:** - Mistral-7B-Instruct (Researcher, Analyzer, Writer) - StarCoder2-15B (Coder) """) # Connect button process_btn.click( fn=process_task, inputs=[ task_input, researcher_check, coder_check, analyzer_check, writer_check, max_tokens ], outputs=[output_display, stats_display, json_output] ) # Launch with optimized settings if __name__ == "__main__": demo.queue(max_size=20) # Handle multiple users demo.launch( show_error=True, share=False )