Spaces:
Sleeping
Sleeping
| """Gradio web interface for Heavy multi-model system with model selection.""" | |
| import asyncio | |
| import gradio as gr | |
| import yaml | |
| from pathlib import Path | |
| from .multi_client import MultiModelClient | |
| from .multi_orchestrator import MultiOrchestrator | |
| from .tavily_search import TavilySearcher | |
| from typing import Generator | |
| def load_config(config_path: str = "config.yaml") -> dict: | |
| """Load configuration from YAML file.""" | |
| config_file = Path(config_path) | |
| if config_file.exists(): | |
| with open(config_file, 'r') as f: | |
| return yaml.safe_load(f) | |
| else: | |
| return { | |
| 'model': {'name': 'anthropic/claude-3.5-sonnet', 'temperature': 0.7, 'max_tokens': 4000}, | |
| 'orchestrator': {'num_agents': 4, 'parallel_execution': True}, | |
| 'agent': {'timeout': 120, 'retry_attempts': 3}, | |
| 'output': {'verbose': True, 'show_agent_thoughts': True} | |
| } | |
| async def process_query_multi( | |
| query: str, | |
| num_agents: int, | |
| show_agent_thoughts: bool, | |
| mode: str, | |
| single_model: str, | |
| orchestrator_model: str, | |
| agent_model: str, | |
| synthesizer_model: str, | |
| api_key: str, | |
| use_tavily: bool, | |
| tavily_api_key: str, | |
| chat_history: list = None | |
| ) -> tuple[str, str, str, str]: | |
| """Process query with model selection and conversation context. | |
| Args: | |
| chat_history: List of previous conversation turns for context | |
| Returns: | |
| Tuple of (model_info, questions, agent_analyses, final_response) | |
| """ | |
| # Build context from chat history | |
| context_query = query | |
| if chat_history and len(chat_history) > 0: | |
| # Format previous conversation for context | |
| conversation_context = "Previous conversation:\n" | |
| for turn in chat_history[-6:]: # Use last 6 messages (3 exchanges) for context | |
| role = "User" if turn["role"] == "user" else "Assistant" | |
| conversation_context += f"{role}: {turn['content']}\n" | |
| conversation_context += f"\nCurrent question: {query}" | |
| context_query = conversation_context | |
| # Load config and override with web params | |
| config = load_config() | |
| config['orchestrator']['num_agents'] = num_agents | |
| config['output']['show_agent_thoughts'] = show_agent_thoughts | |
| config['output']['verbose'] = False # Disable CLI output | |
| # Determine which models to use based on mode | |
| if mode == "Original M": | |
| # Use make-it-heavy default: GPT-4.1 Mini for all roles | |
| orch_model = "gpt-4.1-mini" | |
| ag_model = "gpt-4.1-mini" | |
| synth_model = "gpt-4.1-mini" | |
| model_info = """**Model Configuration:** | |
| Using **make-it-heavy** original implementation with GPT-4.1 Mini (cost-efficient and fast) | |
| """ | |
| elif mode == "S": | |
| orch_model = single_model | |
| ag_model = single_model | |
| synth_model = single_model | |
| model_names = MultiModelClient.MODELS | |
| model_info = f"""**Model Configuration:** | |
| - **All Roles**: {model_names[single_model]['display_name']} | |
| """ | |
| else: # Multi-Model mode (M) | |
| orch_model = orchestrator_model | |
| ag_model = agent_model | |
| synth_model = synthesizer_model | |
| model_names = MultiModelClient.MODELS | |
| model_info = f"""**Model Configuration:** | |
| - **Orchestrator** (Question Generator): {model_names[orch_model]['display_name']} | |
| - **Agents** (Parallel Analyzers): {model_names[ag_model]['display_name']} | |
| - **Synthesizer** (Final Response): {model_names[synth_model]['display_name']} | |
| """ | |
| # Initialize multi-model client with user's API key | |
| client = MultiModelClient( | |
| openrouter_api_key=api_key if api_key else None, | |
| temperature=config['model']['temperature'], | |
| max_tokens=config['model']['max_tokens'] | |
| ) | |
| # Initialize Tavily searcher if enabled | |
| tavily_searcher = None | |
| if use_tavily and tavily_api_key: | |
| tavily_searcher = TavilySearcher(api_key=tavily_api_key) | |
| model_info += "\nπ **Web Search**: Enabled (Tavily)\n" | |
| else: | |
| model_info += "\nπ **Web Search**: Disabled\n" | |
| # Initialize orchestrator with model selection | |
| orchestrator = MultiOrchestrator( | |
| client, | |
| config, | |
| orchestrator_model=orch_model, | |
| agent_model=ag_model, | |
| synthesizer_model=synth_model, | |
| tavily_searcher=tavily_searcher | |
| ) | |
| # Generate questions (with conversation context) | |
| questions = await orchestrator._generate_questions(context_query) | |
| questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)]) | |
| # Execute agents (with conversation context) | |
| agent_results = await orchestrator._execute_agents_parallel(context_query, questions) | |
| # Format agent analyses | |
| agent_text = "" | |
| if show_agent_thoughts: | |
| for result in agent_results: | |
| if result['success']: | |
| model_display = model_names[result['model']]['display_name'] | |
| agent_text += f"**Agent {result['agent_id']} ({model_display})**\n\n" | |
| agent_text += f"*Question: {result['question']}*\n\n" | |
| agent_text += f"{result['analysis']}\n\n---\n\n" | |
| else: | |
| model_display = model_names[result['model']]['display_name'] | |
| agent_text += f"**Agent {result['agent_id']} ({model_display}) failed:** {result['error']}\n\n---\n\n" | |
| else: | |
| successful = len([r for r in agent_results if r['success']]) | |
| agent_text = f"β {successful} agents completed analysis successfully" | |
| # Synthesize final response (with conversation context) | |
| final_response = await orchestrator._synthesize_results(context_query, agent_results) | |
| return model_info, questions_text, agent_text, final_response | |
| def process_query_sync( | |
| query: str, | |
| num_agents: int, | |
| show_agent_thoughts: bool, | |
| mode: str, | |
| single_model: str, | |
| orchestrator_model: str, | |
| agent_model: str, | |
| synthesizer_model: str, | |
| api_key: str, | |
| use_tavily: bool, | |
| tavily_api_key: str, | |
| chat_history: list = None | |
| ): | |
| """Synchronous wrapper for async query processing.""" | |
| if not query.strip(): | |
| return "Please enter a query", "", "", "" | |
| if not api_key.strip(): | |
| return "β οΈ Please enter your OpenRouter API key", "", "", "" | |
| if use_tavily and not tavily_api_key.strip(): | |
| return "β οΈ Please enter your Tavily API key or disable web search", "", "", "" | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| try: | |
| model_info, questions, agents, response = loop.run_until_complete( | |
| process_query_multi( | |
| query, num_agents, show_agent_thoughts, mode, | |
| single_model, orchestrator_model, agent_model, synthesizer_model, | |
| api_key, use_tavily, tavily_api_key, chat_history | |
| ) | |
| ) | |
| return model_info, questions, agents, response | |
| finally: | |
| loop.close() | |
| def process_chat_message( | |
| message: str, | |
| chat_history: list, | |
| num_agents: int, | |
| show_agent_thoughts: bool, | |
| mode: str, | |
| single_model: str, | |
| orchestrator_model: str, | |
| agent_model: str, | |
| synthesizer_model: str, | |
| api_key: str, | |
| use_tavily: bool, | |
| tavily_api_key: str | |
| ): | |
| """Process a chat message and update conversation history.""" | |
| if not message.strip(): | |
| return chat_history, "", "", "", "" | |
| if not api_key.strip(): | |
| # Add error message to chat | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": "β οΈ Please enter your OpenRouter API key in the settings above."}) | |
| return chat_history, "", "", "", "" | |
| if use_tavily and not tavily_api_key.strip(): | |
| chat_history.append({"role": "user", "content": message}) | |
| chat_history.append({"role": "assistant", "content": "β οΈ Please enter your Tavily API key or disable web search."}) | |
| return chat_history, "", "", "", "" | |
| # Add user message to history | |
| chat_history.append({"role": "user", "content": message}) | |
| # Process the query with conversation context | |
| model_info, questions, agents, response = process_query_sync( | |
| message, num_agents, show_agent_thoughts, mode, | |
| single_model, orchestrator_model, agent_model, synthesizer_model, | |
| api_key, use_tavily, tavily_api_key, chat_history[:-1] # Exclude the just-added user message | |
| ) | |
| # Add assistant response to history | |
| chat_history.append({"role": "assistant", "content": response}) | |
| return chat_history, model_info, questions, agents, response | |
| def generate_plan_mode( | |
| task: str, | |
| num_parallel_agents: int, | |
| planner_model: str, | |
| api_key: str | |
| ) -> tuple[str, str]: | |
| """Create a structured Plan Mode brief without running execution. | |
| Returns a tuple of (model_info, plan_markdown). | |
| """ | |
| if not task.strip(): | |
| return "", "β οΈ Please enter a task or goal to plan." | |
| if not api_key.strip(): | |
| return "", "β οΈ Please enter your OpenRouter API key." | |
| client = MultiModelClient(openrouter_api_key=api_key if api_key else None) | |
| model_names = MultiModelClient.MODELS | |
| model_display = model_names.get(planner_model, {}).get("display_name", planner_model) | |
| system_prompt = """You are Plan Mode Orchestrator for a multi-agent workflow. Your job is to produce a crisp, dependency-aware plan (not to execute). | |
| Follow this shape and keep it under 450 words: | |
| - Plan Snapshot: goal, success criteria, timebox/constraints. | |
| - Clarifying Questions: 3-6 blocking questions the user must answer before execution. | |
| - Workstreams (parallel-friendly): allocate to Agent 1..N with deliverables and what 'done' means. | |
| - Execution Steps: ordered steps with dependencies; note which can run in parallel. | |
| - Risks & Checks: top risks, decision points, and how to validate outputs fast. | |
| - Agent Prompts: 1 short, ready-to-run prompt per agent referencing this plan. | |
| Be concrete, avoid fluff, and keep scope tight. If information is missing, flag it explicitly in Clarifying Questions and Risks instead of guessing.""" | |
| user_prompt = f"""Task/Goal: | |
| {task.strip()} | |
| Parallel agents available: {num_parallel_agents} | |
| Desired output style: Markdown with headings for each section above.""" | |
| plan_markdown = client.chat( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| model=planner_model, | |
| temperature=0.4, | |
| max_tokens=1200 | |
| ) | |
| model_info = f"**Planner Model:** {model_display} | **Parallel agents queued:** {num_parallel_agents}" | |
| return model_info, plan_markdown | |
| # Get available models | |
| AVAILABLE_MODELS = [ | |
| ("GPT-5", "gpt-5"), | |
| ("GPT-5.1", "gpt-5.1"), | |
| ("Gemini 3 Pro Preview", "gemini-3-pro-preview"), | |
| ("Gemini 2.5 Pro", "gemini-2.5-pro"), | |
| ("Claude 4.5 Sonnet", "claude-4.5-sonnet"), | |
| ("Claude 4.5 Opus", "claude-4.5-opus"), | |
| ("GPT-4.1 Mini (make-it-heavy default)", "gpt-4.1-mini"), | |
| ("Gemini 2.0 Flash (fast)", "gemini-2.0-flash"), | |
| ("Llama 3.1 70B (open source)", "llama-3.1-70b") | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="Heavy Multi-Model - AI Analysis", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π€ Heavy Multi-Model 2.0 - AI Analysis System | |
| **NEW in v2.0:** | |
| - π Web Search with Tavily! Get real-time information from the web | |
| - π¬ **Chat Mode with Context!** Have multi-turn conversations with memory | |
| Choose different AI models for each role, use one model for everything, or use the original make-it-heavy implementation! | |
| **Available Models:** GPT-5, GPT-5.1, Gemini 3 Pro Preview, Gemini 2.5 Pro, Claude 4.5 Sonnet, Claude 4.5 Opus, GPT-4.1 Mini, Gemini 2.0 Flash, Llama 3.1 70B | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # API Key input | |
| api_key_input = gr.Textbox( | |
| label="π OpenRouter API Key", | |
| placeholder="Enter your OpenRouter API key (sk-or-v1-...)", | |
| type="password", | |
| info="Get your key from https://openrouter.ai/keys" | |
| ) | |
| # Tavily Web Search Configuration | |
| gr.Markdown("### π Web Search (NEW in v2.0)") | |
| with gr.Row(): | |
| use_tavily_checkbox = gr.Checkbox( | |
| label="Enable Web Search", | |
| value=False, | |
| info="Use Tavily to search the web for real-time information" | |
| ) | |
| tavily_api_key_input = gr.Textbox( | |
| label="π Tavily API Key (Optional)", | |
| placeholder="Enter your Tavily API key (tvly-...)", | |
| type="password", | |
| info="Get your key from https://tavily.com", | |
| visible=False | |
| ) | |
| query_input = gr.Textbox( | |
| label="Your Query", | |
| placeholder="What are the implications of quantum computing on cryptography?", | |
| lines=3 | |
| ) | |
| # Model selection mode | |
| gr.Markdown("### π― Model Configuration") | |
| mode_radio = gr.Radio( | |
| choices=[ | |
| "Single Model (all roles use same model)", | |
| "Multi-Model (assign different models to each role)", | |
| "Use make-it-heavy (original repo)" | |
| ], | |
| value="Single Model (all roles use same model)", | |
| label="Mode", | |
| info="Choose how to configure the analysis" | |
| ) | |
| # Single model selector (visible in single model mode) | |
| with gr.Group(visible=True) as single_model_group: | |
| single_model_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_MODELS, | |
| value="claude-4.5-sonnet", | |
| label="Model for All Roles", | |
| info="This model will be used for orchestrator, agents, and synthesizer" | |
| ) | |
| # Multi-model selectors (visible in multi-model mode) | |
| with gr.Group(visible=False) as multi_model_group: | |
| gr.Markdown("**Assign models to each role:**") | |
| orchestrator_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_MODELS, | |
| value="claude-4.5-sonnet", | |
| label="Orchestrator Model", | |
| info="Generates specialized research questions" | |
| ) | |
| agent_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_MODELS, | |
| value="gpt-5.1", | |
| label="Agent Model", | |
| info="All agents use this model for parallel analysis" | |
| ) | |
| synthesizer_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_MODELS, | |
| value="gemini-3-pro-preview", | |
| label="Synthesizer Model", | |
| info="Combines all agent insights into final response" | |
| ) | |
| gr.Markdown("### βοΈ Analysis Settings") | |
| with gr.Row(): | |
| num_agents_slider = gr.Slider( | |
| minimum=2, | |
| maximum=8, | |
| value=4, | |
| step=1, | |
| label="Number of Agents", | |
| info="More agents = more perspectives" | |
| ) | |
| show_thoughts_checkbox = gr.Checkbox( | |
| label="Show Agent Thoughts", | |
| value=True, | |
| info="Display individual agent analyses" | |
| ) | |
| submit_btn = gr.Button("π Analyze with Selected Models", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown( | |
| """ | |
| ### How It Works | |
| **Roles:** | |
| 1. **Orchestrator**: Breaks your query into specialized questions | |
| 2. **Agents**: Analyze different perspectives in parallel | |
| 3. **Synthesizer**: Combines insights into comprehensive answer | |
| ### π Web Search (v2.0) | |
| **Enable Tavily to:** | |
| - Give agents access to real-time web data | |
| - Search for each specialized question | |
| - Enhance analysis with current facts | |
| - Cite sources in responses | |
| ### Model Selection | |
| **Single Model Mode:** | |
| - Use one model for all roles | |
| - Simpler, more consistent | |
| - Like original Heavy | |
| **Multi-Model Mode:** | |
| - Assign different models to different roles | |
| - Leverage each model's strengths | |
| - More diverse perspectives | |
| **make-it-heavy Mode:** | |
| - Uses original make-it-heavy repo approach | |
| - GPT-4.1 Mini (cost-efficient) | |
| - Proven multi-agent architecture | |
| ### Tips | |
| - Try different combinations! | |
| - Claude 4.5: Great reasoning | |
| - GPT-5: Fast and creative | |
| - GPT-5.1: Latest frontier reasoning + creativity | |
| - Gemini 3 Pro Preview: Deep multimodal analysis | |
| - Gemini 2.5 Pro: Excellent synthesis + summarization | |
| - GPT-4.1 Mini: Cost-effective | |
| - Enable web search for current topics! | |
| """ | |
| ) | |
| # Toggle visibility based on mode | |
| def toggle_model_selection(mode): | |
| if mode == "Single Model (all roles use same model)": | |
| return gr.update(visible=True), gr.update(visible=False) | |
| elif mode == "Multi-Model (assign different models to each role)": | |
| return gr.update(visible=False), gr.update(visible=True) | |
| else: # Use make-it-heavy mode | |
| return gr.update(visible=False), gr.update(visible=False) | |
| # Toggle Tavily API key visibility based on checkbox | |
| def toggle_tavily_key(use_tavily): | |
| return gr.update(visible=use_tavily) | |
| mode_radio.change( | |
| fn=toggle_model_selection, | |
| inputs=[mode_radio], | |
| outputs=[single_model_group, multi_model_group] | |
| ) | |
| use_tavily_checkbox.change( | |
| fn=toggle_tavily_key, | |
| inputs=[use_tavily_checkbox], | |
| outputs=[tavily_api_key_input] | |
| ) | |
| gr.Markdown("---") | |
| with gr.Accordion("π― Model Configuration", open=True): | |
| model_info_output = gr.Markdown( | |
| label="Active Models" | |
| ) | |
| with gr.Accordion("π Generated Research Questions", open=True): | |
| questions_output = gr.Textbox( | |
| label="Specialized Questions", | |
| lines=6, | |
| interactive=False | |
| ) | |
| with gr.Accordion("π Agent Analyses", open=False): | |
| agents_output = gr.Markdown( | |
| label="Individual Agent Thoughts" | |
| ) | |
| with gr.Accordion("β¨ Final Synthesized Response", open=True): | |
| response_output = gr.Markdown( | |
| label="Comprehensive Answer" | |
| ) | |
| # Examples | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "How do I choose the right database for my application?", | |
| 4, True, | |
| "Single Model (all roles use same model)", | |
| "claude-4.5-sonnet", | |
| "claude-4.5-sonnet", "gpt-5", "gemini-2.5-pro", | |
| False, "" | |
| ], | |
| [ | |
| "What are the trade-offs between microservices and monolithic architecture?", | |
| 4, True, | |
| "Multi-Model (assign different models to each role)", | |
| "claude-4.5-sonnet", | |
| "claude-4.5-sonnet", "gpt-5", "gemini-2.5-pro", | |
| False, "" | |
| ], | |
| [ | |
| "How can I optimize my Python web application for performance?", | |
| 4, True, | |
| "Use make-it-heavy (original repo)", | |
| "gpt-4.1-mini", | |
| "claude-4.5-sonnet", "gpt-5", "gemini-2.5-pro", | |
| False, "" | |
| ], | |
| ], | |
| inputs=[ | |
| query_input, num_agents_slider, show_thoughts_checkbox, | |
| mode_radio, single_model_dropdown, | |
| orchestrator_dropdown, agent_dropdown, synthesizer_dropdown, | |
| use_tavily_checkbox, tavily_api_key_input | |
| ], | |
| label="Example Configurations (Note: You still need to enter your API keys)" | |
| ) | |
| # Connect button to processing function | |
| submit_btn.click( | |
| fn=process_query_sync, | |
| inputs=[ | |
| query_input, num_agents_slider, show_thoughts_checkbox, | |
| mode_radio, single_model_dropdown, | |
| orchestrator_dropdown, agent_dropdown, synthesizer_dropdown, | |
| api_key_input, use_tavily_checkbox, tavily_api_key_input | |
| ], | |
| outputs=[model_info_output, questions_output, agents_output, response_output] | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **How to Use:** | |
| 1. Enter your OpenRouter API key (get it from: https://openrouter.ai/keys) | |
| 2. **(Optional)** Enable web search and enter your Tavily API key (get it from: https://tavily.com) | |
| 3. Choose your preferred mode: Single Model, Multi-Model, or make-it-heavy | |
| 4. Enter your query and click "Analyze" | |
| **What's New in v2.0:** | |
| - π **Web Search Integration**: Enable Tavily to give agents access to real-time web information | |
| - Agents will automatically search the web for each specialized question when enabled | |
| - Enhances analysis with current data, facts, and diverse perspectives | |
| **Note:** Processing time varies by model and number of agents. Your API keys are never stored - they're only used for this session. | |
| """ | |
| ) | |
| def launch(share=True, server_port=7861): | |
| """Launch the Gradio web interface with public sharing enabled. | |
| Args: | |
| share: Create a public shareable link (default: True for public access) | |
| server_port: Port to run the server on (7861 to avoid conflict) | |
| """ | |
| demo.launch( | |
| share=share, | |
| server_port=server_port, | |
| server_name="0.0.0.0", # Allow external connections | |
| show_error=True, | |
| quiet=False, | |
| inbrowser=True, # Auto-open browser | |
| prevent_thread_lock=False | |
| ) | |
| if __name__ == "__main__": | |
| launch() | |