import sys import subprocess def install_packages(): """Automatically install missing packages""" required_packages = [ 'gradio', 'requests', 'python-dotenv', ] for package in required_packages: try: __import__(package.replace('-', '_')) print(f"✓ {package} already installed") except ImportError: print(f"Installing {package}...") subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"]) print(f"✓ {package} installed") print("Checking dependencies...") install_packages() print("✓ All dependencies ready!\n") import os import json import time import logging from typing import List, Dict, Any, Optional, Tuple from datetime import datetime from dataclasses import dataclass from enum import Enum import random import gradio as gr import requests from dotenv import load_dotenv from concurrent.futures import ThreadPoolExecutor, as_completed load_dotenv() logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # ============================================================================ # CONFIGURATION # ============================================================================ class APIProvider(Enum): GROQ = "groq" GOOGLE = "google" ANTHROPIC = "anthropic" OPENAI = "openai" PERPLEXITY = "perplexity" OPENROUTER = "openrouter" @dataclass class LLMConfig: provider: APIProvider model_name: str api_key_env: str base_url: str headers_template: Dict[str, str] request_payload_template: Dict[str, Any] response_extractor: callable rate_limit: int # ============================================================================ # LLM CONFIGURATIONS - ALL 12+ MODELS FROM ORIGINAL APP.PY # ============================================================================ LLM_CONFIGS: Dict[str, LLMConfig] = { # ===== GROQ (Ultra-Fast, Free) ===== "Llama-3.3-70B (Groq)": LLMConfig( provider=APIProvider.GROQ, model_name="llama-3.3-70b-versatile", api_key_env=os.getenv("groq"), base_url="https://api.groq.com/openai/v1/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "llama-3.3-70b-versatile", "messages": [], "temperature": 0.7, "max_tokens": 1024, "top_p": 0.9, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=30, ), "Llama-3.2-90B-Vision (Groq)": LLMConfig( provider=APIProvider.GROQ, model_name="llama-3.2-90b-vision-preview", api_key_env=os.getenv("groq"), base_url="https://api.groq.com/openai/v1/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "llama-3.2-90b-vision-preview", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=30, ), # ===== GOOGLE (Gemini, Free Tier) ===== "Gemini-2.0-Flash": LLMConfig( provider=APIProvider.GOOGLE, model_name="gemini-2.0-flash", api_key_env=os.getenv("gemini"), base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent", headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"}, request_payload_template={ "contents": [{"parts": [{"text": ""}]}], "generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024}, }, response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"], rate_limit=60, ), "Gemini-2.0-Pro": LLMConfig( provider=APIProvider.GOOGLE, model_name="gemini-2.0-pro", api_key_env=os.getenv("gemini"), base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro:generateContent", headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"}, request_payload_template={ "contents": [{"parts": [{"text": ""}]}], "generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024}, }, response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"], rate_limit=60, ), # ===== ANTHROPIC (Claude) ===== "Claude-3.5-Sonnet": LLMConfig( provider=APIProvider.ANTHROPIC, model_name="claude-3-5-sonnet-20241022", api_key_env="ANTHROPIC_API_KEY", base_url="https://api.anthropic.com/v1/messages", headers_template={ "x-api-key": "{api_key}", "anthropic-version": "2023-06-01", "content-type": "application/json" }, request_payload_template={ "model": "claude-3-5-sonnet-20241022", "messages": [], "max_tokens": 1024, "temperature": 0.7, }, response_extractor=lambda r: r.json()["content"][0]["text"], rate_limit=50, ), "Claude-3-Opus": LLMConfig( provider=APIProvider.ANTHROPIC, model_name="claude-3-opus-20240229", api_key_env="ANTHROPIC_API_KEY", base_url="https://api.anthropic.com/v1/messages", headers_template={ "x-api-key": "{api_key}", "anthropic-version": "2023-06-01", "content-type": "application/json" }, request_payload_template={ "model": "claude-3-opus-20240229", "messages": [], "max_tokens": 1024, "temperature": 0.7, }, response_extractor=lambda r: r.json()["content"][0]["text"], rate_limit=50, ), "Claude-3-Haiku": LLMConfig( provider=APIProvider.ANTHROPIC, model_name="claude-3-haiku-20240307", api_key_env="ANTHROPIC_API_KEY", base_url="https://api.anthropic.com/v1/messages", headers_template={ "x-api-key": "{api_key}", "anthropic-version": "2023-06-01", "content-type": "application/json" }, request_payload_template={ "model": "claude-3-haiku-20240307", "messages": [], "max_tokens": 1024, "temperature": 0.7, }, response_extractor=lambda r: r.json()["content"][0]["text"], rate_limit=100, ), # ===== OPENAI (ChatGPT & GPT-4) ===== "GPT-4-Turbo": LLMConfig( provider=APIProvider.OPENAI, model_name="gpt-4-turbo", api_key_env=os.getenv("openai"), base_url="https://api.openai.com/v1/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "gpt-4-turbo", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=50, ), "GPT-4o": LLMConfig( provider=APIProvider.OPENAI, model_name="gpt-4o", api_key_env=os.getenv("openai"), base_url="https://api.openai.com/v1/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "gpt-4o", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=50, ), "GPT-4o-mini": LLMConfig( provider=APIProvider.OPENAI, model_name="gpt-4o-mini", api_key_env=os.getenv("openai"), base_url="https://api.openai.com/v1/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "gpt-4o-mini", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=50, ), # ===== PERPLEXITY ===== "Perplexity-Sonar-Large": LLMConfig( provider=APIProvider.PERPLEXITY, model_name="llama-3.1-sonar-large-128k-online", api_key_env=os.getenv("perplexity"), base_url="https://api.perplexity.ai/chat/completions", headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"}, request_payload_template={ "model": "llama-3.1-sonar-large-128k-online", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=40, ), # ===== OPENROUTER ===== "Mistral-7B": LLMConfig( provider=APIProvider.OPENROUTER, model_name="mistralai/mistral-7b-instruct:free", api_key_env=os.getenv("openrouter"), base_url="https://openrouter.ai/api/v1/chat/completions", headers_template={ "Authorization": "Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "http://localhost" }, request_payload_template={ "model": "mistralai/mistral-7b-instruct:free", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=20, ), "Qwen-2.5-72B": LLMConfig( provider=APIProvider.OPENROUTER, model_name="qwen/qwen-2.5-72b-instruct:free", api_key_env=os.getenv("openrouter"), base_url="https://openrouter.ai/api/v1/chat/completions", headers_template={ "Authorization": "Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "http://localhost" }, request_payload_template={ "model": "qwen/qwen-2.5-72b-instruct:free", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=20, ), "DeepSeek-R1": LLMConfig( provider=APIProvider.OPENROUTER, model_name="deepseek/deepseek-r1:free", api_key_env=os.getenv("openrouter"), base_url="https://openrouter.ai/api/v1/chat/completions", headers_template={ "Authorization": "Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "http://localhost" }, request_payload_template={ "model": "deepseek/deepseek-r1:free", "messages": [], "temperature": 0.7, "max_tokens": 1024, }, response_extractor=lambda r: r.json()["choices"][0]["message"]["content"], rate_limit=15, ), } # ============================================================================ # STAGE 1: PARALLEL OPINIONS # ============================================================================ class Stage1Executor: def __init__(self, models: List[str], timeout: int = 45): self.models = models self.timeout = timeout self.responses: Dict[str, Dict[str, Any]] = {} def _call_llm(self, model_name: str, user_query: str) -> Optional[str]: try: config = LLM_CONFIGS[model_name] api_key = os.getenv(config.api_key_env) if not api_key: logger.warning(f"API key missing: {config.api_key_env}") return None if config.provider == APIProvider.GOOGLE: payload = { "contents": [{"parts": [{"text": user_query}]}], "generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024}, } headers = config.headers_template.copy() headers["x-goog-api-key"] = api_key elif config.provider == APIProvider.ANTHROPIC: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": user_query}] headers = config.headers_template.copy() headers["x-api-key"] = api_key else: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": user_query}] headers = config.headers_template.copy() headers["Authorization"] = f"Bearer {api_key}" response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout) response.raise_for_status() result = config.response_extractor(response) logger.info(f"✓ {model_name}") return result except Exception as e: logger.error(f"✗ {model_name}: {str(e)}") return None def execute(self, user_query: str) -> Dict[str, Dict[str, Any]]: self.responses = {} with ThreadPoolExecutor(max_workers=min(len(self.models), 8)) as executor: future_to_model = {executor.submit(self._call_llm, model, user_query): model for model in self.models} for future in as_completed(future_to_model): model_name = future_to_model[future] try: response = future.result() if response: self.responses[model_name] = {"response": response, "timestamp": datetime.now().isoformat()} except Exception as e: logger.error(f"Error {model_name}: {str(e)}") return self.responses # ============================================================================ # STAGE 2: PEER REVIEW # ============================================================================ class Stage2Executor: def __init__(self, stage1_responses: Dict[str, Dict[str, Any]], timeout: int = 60): self.stage1_responses = stage1_responses self.timeout = timeout def _anonymize_responses(self) -> Dict[str, str]: models = list(self.stage1_responses.keys()) anonymous_map = {} shuffled = models.copy() random.shuffle(shuffled) for idx, model in enumerate(shuffled): anonymous_map[f"Model_{chr(65 + idx)}"] = model return anonymous_map def _generate_review_prompt(self, anon_map: Dict[str, str], query: str) -> str: text = f"Query: {query}\n\nReview:\n" for anon_name, model in anon_map.items(): text += f"{anon_name}: {self.stage1_responses[model]['response']}\n\n" text += "Rank models as JSON: {\"rankings\": [{\"model\": \"Model_X\", \"score\": 9}]}" return text def _call_reviewer(self, model: str, prompt: str) -> Optional[Dict]: try: config = LLM_CONFIGS[model] api_key = os.getenv(config.api_key_env) if not api_key: return None if config.provider == APIProvider.GOOGLE: payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.3, "maxOutputTokens": 2048}} headers = config.headers_template.copy() headers["x-goog-api-key"] = api_key elif config.provider == APIProvider.ANTHROPIC: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": prompt}] payload["max_tokens"] = 2048 headers = config.headers_template.copy() headers["x-api-key"] = api_key else: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": prompt}] payload["max_tokens"] = 2048 headers = config.headers_template.copy() headers["Authorization"] = f"Bearer {api_key}" response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout) response.raise_for_status() result = config.response_extractor(response) try: j_start = result.find('{') j_end = result.rfind('}') + 1 if j_start != -1 and j_end > j_start: return json.loads(result[j_start:j_end]) except: pass return {"raw": result} except Exception as e: logger.error(f"Review error: {str(e)}") return None def execute(self, query: str) -> Dict[str, Any]: anon_map = self._anonymize_responses() prompt = self._generate_review_prompt(anon_map, query) reviews = {} for model in self.stage1_responses.keys(): result = self._call_reviewer(model, prompt) if result: reviews[model] = result return {"reviews": reviews, "anonymous_map": anon_map} # ============================================================================ # STAGE 3: SYNTHESIS # ============================================================================ class Stage3Executor: def __init__(self, s1: Dict, s2: Dict, timeout: int = 60): self.s1 = s1 self.s2 = s2 self.timeout = timeout def _generate_prompt(self, query: str, anon_map: Dict) -> str: text = f"Query: {query}\n\nResponses:\n" for a, m in anon_map.items(): text += f"{a}: {self.s1[m]['response']}\n\n" text += "Synthesize final answer" return text def _call_chairman(self, prompt: str, model: str) -> Optional[str]: try: config = LLM_CONFIGS[model] api_key = os.getenv(config.api_key_env) if not api_key: return None if config.provider == APIProvider.GOOGLE: payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.5, "maxOutputTokens": 4096}} headers = config.headers_template.copy() headers["x-goog-api-key"] = api_key elif config.provider == APIProvider.ANTHROPIC: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": prompt}] payload["max_tokens"] = 4096 headers = config.headers_template.copy() headers["x-api-key"] = api_key else: payload = config.request_payload_template.copy() payload["messages"] = [{"role": "user", "content": prompt}] payload["max_tokens"] = 4096 headers = config.headers_template.copy() headers["Authorization"] = f"Bearer {api_key}" response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout) response.raise_for_status() return config.response_extractor(response) except Exception as e: logger.error(f"Chairman error: {str(e)}") return None def execute(self, query: str, model: str, anon_map: Dict) -> Dict[str, Any]: prompt = self._generate_prompt(query, anon_map) final = self._call_chairman(prompt, model) return {"final_response": final or "Unable to synthesize"} # ============================================================================ # ORCHESTRATOR # ============================================================================ class LLMCouncil: def __init__(self, models: List[str], chairman: str): self.models = models self.chairman = chairman def execute(self, query: str) -> Dict[str, Any]: try: logger.info("STAGE 1...") s1 = Stage1Executor(self.models) s1_resp = s1.execute(query) if not s1_resp: return {"error": "Stage 1 failed: No responses"} logger.info("STAGE 2...") s2 = Stage2Executor(s1_resp) s2_result = s2.execute(query) logger.info("STAGE 3...") s3 = Stage3Executor(s1_resp, s2_result["reviews"]) s3_result = s3.execute(query, self.chairman, s2_result["anonymous_map"]) return { "stage_1": {model: resp["response"] for model, resp in s1_resp.items()}, "stage_2": s2_result["reviews"], "stage_3": s3_result["final_response"], } except Exception as e: logger.error(f"Error: {str(e)}") return {"error": str(e)} # ============================================================================ # GRADIO UI # ============================================================================ def run_council(query: str, models_str: str, chairman: str) -> Tuple[str, str, str]: """Run council and return results""" if not query.strip(): return ("Enter a query", "", "") if not models_str.strip(): return ("Select models (comma-separated)", "", "") models = [m.strip() for m in models_str.split(",")] models = [m for m in models if m in LLM_CONFIGS] if len(models) < 2: return ("Select at least 2 valid models", "", "") if chairman not in LLM_CONFIGS: return ("Select valid chairman", "", "") try: council = LLMCouncil(models, chairman) result = council.execute(query) if "error" in result: return (f"❌ {result['error']}", "", "") # Final Synthesis final = result.get("stage_3", "No response") # Stage 1 stage1 = "## Stage 1: Model Responses\n\n" for model, resp in result.get("stage_1", {}).items(): stage1 += f"**{model}**\n{resp}\n\n" # Stage 2 stage2 = "## Stage 2: Reviews\n\n" for model, review in result.get("stage_2", {}).items(): stage2 += f"**{model}**\n{json.dumps(review, indent=2)}\n\n" return (final, stage1, stage2) except Exception as e: return (f"Error: {str(e)}", "", "") def get_api_status() -> str: """Get API status""" status = "## API Status\n\n" providers = {} for model in LLM_CONFIGS.keys(): config = LLM_CONFIGS[model] key_set = os.getenv(config.api_key_env) is not None if config.api_key_env not in providers: providers[config.api_key_env] = key_set for provider, is_set in sorted(providers.items()): icon = "✓" if is_set else "✗" status += f"{icon} {provider}: {'OK' if is_set else 'MISSING'}\n\n" return status # ============================================================================ # MAIN # ============================================================================ def create_interface(): """Create Gradio interface""" available_models = sorted(list(LLM_CONFIGS.keys())) default_models = ", ".join(available_models[:3]) demo = gr.Blocks() with demo: gr.Markdown(""" # 🏛️ LLM Council **3-Stage Consensus AI Pipeline with 12+ Models** ✅ Stage 1: Parallel opinions from all selected models ✅ Stage 2: Anonymous peer review and ranking ✅ Stage 3: Chairman synthesizes final consensus """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### ⚙️ Setup") gr.Markdown(get_api_status()) models_input = gr.Textbox( label="Models (comma-separated)", value=default_models, lines=3, ) chairman_input = gr.Dropdown( choices=available_models, label="Chairman Model", value=available_models[0] if available_models else None ) gr.Markdown(""" ### 📋 Available Models: - **Groq**: Llama-3.3-70B, Llama-3.2-90B-Vision - **Google**: Gemini-2.0-Flash, Gemini-2.0-Pro - **Claude**: Claude-3.5-Sonnet, Claude-3-Opus, Claude-3-Haiku - **OpenAI**: GPT-4o, GPT-4o-mini, GPT-4-Turbo - **Perplexity**: Sonar-Large - **OpenRouter**: Mistral-7B, Qwen-2.5-72B, DeepSeek-R1 """) with gr.Column(scale=2): gr.Markdown("### 🎯 Query") query_input = gr.Textbox( label="Your Question", lines=4, placeholder="Ask anything...", ) run_btn = gr.Button("🚀 Run Council", variant="primary") with gr.Tabs(): with gr.TabItem("Final"): final_out = gr.Markdown() with gr.TabItem("Stage 1"): s1_out = gr.Markdown() with gr.TabItem("Stage 2"): s2_out = gr.Markdown() run_btn.click( fn=run_council, inputs=[query_input, models_input, chairman_input], outputs=[final_out, s1_out, s2_out] ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, share=False)