LLM_council / app.py
1qwsd's picture
Update app.py
27b7103 verified
import sys
import subprocess
def install_packages():
"""Automatically install missing packages"""
required_packages = [
'gradio',
'requests',
'python-dotenv',
]
for package in required_packages:
try:
__import__(package.replace('-', '_'))
print(f"βœ“ {package} already installed")
except ImportError:
print(f"Installing {package}...")
subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])
print(f"βœ“ {package} installed")
print("Checking dependencies...")
install_packages()
print("βœ“ All dependencies ready!\n")
import os
import json
import time
import logging
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
from dataclasses import dataclass
from enum import Enum
import random
import gradio as gr
import requests
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed
load_dotenv()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# ============================================================================
# CONFIGURATION
# ============================================================================
class APIProvider(Enum):
GROQ = "groq"
GOOGLE = "google"
ANTHROPIC = "anthropic"
OPENAI = "openai"
PERPLEXITY = "perplexity"
OPENROUTER = "openrouter"
@dataclass
class LLMConfig:
provider: APIProvider
model_name: str
api_key_env: str
base_url: str
headers_template: Dict[str, str]
request_payload_template: Dict[str, Any]
response_extractor: callable
rate_limit: int
# ============================================================================
# LLM CONFIGURATIONS - ALL 12+ MODELS FROM ORIGINAL APP.PY
# ============================================================================
LLM_CONFIGS: Dict[str, LLMConfig] = {
# ===== GROQ (Ultra-Fast, Free) =====
"Llama-3.3-70B (Groq)": LLMConfig(
provider=APIProvider.GROQ,
model_name="llama-3.3-70b-versatile",
api_key_env=os.getenv("groq"),
base_url="https://api.groq.com/openai/v1/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "llama-3.3-70b-versatile",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
"top_p": 0.9,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=30,
),
"Llama-3.2-90B-Vision (Groq)": LLMConfig(
provider=APIProvider.GROQ,
model_name="llama-3.2-90b-vision-preview",
api_key_env=os.getenv("groq"),
base_url="https://api.groq.com/openai/v1/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "llama-3.2-90b-vision-preview",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=30,
),
# ===== GOOGLE (Gemini, Free Tier) =====
"Gemini-2.0-Flash": LLMConfig(
provider=APIProvider.GOOGLE,
model_name="gemini-2.0-flash",
api_key_env=os.getenv("gemini"),
base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent",
headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"},
request_payload_template={
"contents": [{"parts": [{"text": ""}]}],
"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
},
response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"],
rate_limit=60,
),
"Gemini-2.0-Pro": LLMConfig(
provider=APIProvider.GOOGLE,
model_name="gemini-2.0-pro",
api_key_env=os.getenv("gemini"),
base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro:generateContent",
headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"},
request_payload_template={
"contents": [{"parts": [{"text": ""}]}],
"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
},
response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"],
rate_limit=60,
),
# ===== ANTHROPIC (Claude) =====
"Claude-3.5-Sonnet": LLMConfig(
provider=APIProvider.ANTHROPIC,
model_name="claude-3-5-sonnet-20241022",
api_key_env="ANTHROPIC_API_KEY",
base_url="https://api.anthropic.com/v1/messages",
headers_template={
"x-api-key": "{api_key}",
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
request_payload_template={
"model": "claude-3-5-sonnet-20241022",
"messages": [],
"max_tokens": 1024,
"temperature": 0.7,
},
response_extractor=lambda r: r.json()["content"][0]["text"],
rate_limit=50,
),
"Claude-3-Opus": LLMConfig(
provider=APIProvider.ANTHROPIC,
model_name="claude-3-opus-20240229",
api_key_env="ANTHROPIC_API_KEY",
base_url="https://api.anthropic.com/v1/messages",
headers_template={
"x-api-key": "{api_key}",
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
request_payload_template={
"model": "claude-3-opus-20240229",
"messages": [],
"max_tokens": 1024,
"temperature": 0.7,
},
response_extractor=lambda r: r.json()["content"][0]["text"],
rate_limit=50,
),
"Claude-3-Haiku": LLMConfig(
provider=APIProvider.ANTHROPIC,
model_name="claude-3-haiku-20240307",
api_key_env="ANTHROPIC_API_KEY",
base_url="https://api.anthropic.com/v1/messages",
headers_template={
"x-api-key": "{api_key}",
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
request_payload_template={
"model": "claude-3-haiku-20240307",
"messages": [],
"max_tokens": 1024,
"temperature": 0.7,
},
response_extractor=lambda r: r.json()["content"][0]["text"],
rate_limit=100,
),
# ===== OPENAI (ChatGPT & GPT-4) =====
"GPT-4-Turbo": LLMConfig(
provider=APIProvider.OPENAI,
model_name="gpt-4-turbo",
api_key_env=os.getenv("openai"),
base_url="https://api.openai.com/v1/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "gpt-4-turbo",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=50,
),
"GPT-4o": LLMConfig(
provider=APIProvider.OPENAI,
model_name="gpt-4o",
api_key_env=os.getenv("openai"),
base_url="https://api.openai.com/v1/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "gpt-4o",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=50,
),
"GPT-4o-mini": LLMConfig(
provider=APIProvider.OPENAI,
model_name="gpt-4o-mini",
api_key_env=os.getenv("openai"),
base_url="https://api.openai.com/v1/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "gpt-4o-mini",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=50,
),
# ===== PERPLEXITY =====
"Perplexity-Sonar-Large": LLMConfig(
provider=APIProvider.PERPLEXITY,
model_name="llama-3.1-sonar-large-128k-online",
api_key_env=os.getenv("perplexity"),
base_url="https://api.perplexity.ai/chat/completions",
headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
request_payload_template={
"model": "llama-3.1-sonar-large-128k-online",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=40,
),
# ===== OPENROUTER =====
"Mistral-7B": LLMConfig(
provider=APIProvider.OPENROUTER,
model_name="mistralai/mistral-7b-instruct:free",
api_key_env=os.getenv("openrouter"),
base_url="https://openrouter.ai/api/v1/chat/completions",
headers_template={
"Authorization": "Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "http://localhost"
},
request_payload_template={
"model": "mistralai/mistral-7b-instruct:free",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=20,
),
"Qwen-2.5-72B": LLMConfig(
provider=APIProvider.OPENROUTER,
model_name="qwen/qwen-2.5-72b-instruct:free",
api_key_env=os.getenv("openrouter"),
base_url="https://openrouter.ai/api/v1/chat/completions",
headers_template={
"Authorization": "Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "http://localhost"
},
request_payload_template={
"model": "qwen/qwen-2.5-72b-instruct:free",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=20,
),
"DeepSeek-R1": LLMConfig(
provider=APIProvider.OPENROUTER,
model_name="deepseek/deepseek-r1:free",
api_key_env=os.getenv("openrouter"),
base_url="https://openrouter.ai/api/v1/chat/completions",
headers_template={
"Authorization": "Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "http://localhost"
},
request_payload_template={
"model": "deepseek/deepseek-r1:free",
"messages": [],
"temperature": 0.7,
"max_tokens": 1024,
},
response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
rate_limit=15,
),
}
# ============================================================================
# STAGE 1: PARALLEL OPINIONS
# ============================================================================
class Stage1Executor:
def __init__(self, models: List[str], timeout: int = 45):
self.models = models
self.timeout = timeout
self.responses: Dict[str, Dict[str, Any]] = {}
def _call_llm(self, model_name: str, user_query: str) -> Optional[str]:
try:
config = LLM_CONFIGS[model_name]
api_key = os.getenv(config.api_key_env)
if not api_key:
logger.warning(f"API key missing: {config.api_key_env}")
return None
if config.provider == APIProvider.GOOGLE:
payload = {
"contents": [{"parts": [{"text": user_query}]}],
"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
}
headers = config.headers_template.copy()
headers["x-goog-api-key"] = api_key
elif config.provider == APIProvider.ANTHROPIC:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": user_query}]
headers = config.headers_template.copy()
headers["x-api-key"] = api_key
else:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": user_query}]
headers = config.headers_template.copy()
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
response.raise_for_status()
result = config.response_extractor(response)
logger.info(f"βœ“ {model_name}")
return result
except Exception as e:
logger.error(f"βœ— {model_name}: {str(e)}")
return None
def execute(self, user_query: str) -> Dict[str, Dict[str, Any]]:
self.responses = {}
with ThreadPoolExecutor(max_workers=min(len(self.models), 8)) as executor:
future_to_model = {executor.submit(self._call_llm, model, user_query): model for model in self.models}
for future in as_completed(future_to_model):
model_name = future_to_model[future]
try:
response = future.result()
if response:
self.responses[model_name] = {"response": response, "timestamp": datetime.now().isoformat()}
except Exception as e:
logger.error(f"Error {model_name}: {str(e)}")
return self.responses
# ============================================================================
# STAGE 2: PEER REVIEW
# ============================================================================
class Stage2Executor:
def __init__(self, stage1_responses: Dict[str, Dict[str, Any]], timeout: int = 60):
self.stage1_responses = stage1_responses
self.timeout = timeout
def _anonymize_responses(self) -> Dict[str, str]:
models = list(self.stage1_responses.keys())
anonymous_map = {}
shuffled = models.copy()
random.shuffle(shuffled)
for idx, model in enumerate(shuffled):
anonymous_map[f"Model_{chr(65 + idx)}"] = model
return anonymous_map
def _generate_review_prompt(self, anon_map: Dict[str, str], query: str) -> str:
text = f"Query: {query}\n\nReview:\n"
for anon_name, model in anon_map.items():
text += f"{anon_name}: {self.stage1_responses[model]['response']}\n\n"
text += "Rank models as JSON: {\"rankings\": [{\"model\": \"Model_X\", \"score\": 9}]}"
return text
def _call_reviewer(self, model: str, prompt: str) -> Optional[Dict]:
try:
config = LLM_CONFIGS[model]
api_key = os.getenv(config.api_key_env)
if not api_key:
return None
if config.provider == APIProvider.GOOGLE:
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.3, "maxOutputTokens": 2048}}
headers = config.headers_template.copy()
headers["x-goog-api-key"] = api_key
elif config.provider == APIProvider.ANTHROPIC:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": prompt}]
payload["max_tokens"] = 2048
headers = config.headers_template.copy()
headers["x-api-key"] = api_key
else:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": prompt}]
payload["max_tokens"] = 2048
headers = config.headers_template.copy()
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
response.raise_for_status()
result = config.response_extractor(response)
try:
j_start = result.find('{')
j_end = result.rfind('}') + 1
if j_start != -1 and j_end > j_start:
return json.loads(result[j_start:j_end])
except:
pass
return {"raw": result}
except Exception as e:
logger.error(f"Review error: {str(e)}")
return None
def execute(self, query: str) -> Dict[str, Any]:
anon_map = self._anonymize_responses()
prompt = self._generate_review_prompt(anon_map, query)
reviews = {}
for model in self.stage1_responses.keys():
result = self._call_reviewer(model, prompt)
if result:
reviews[model] = result
return {"reviews": reviews, "anonymous_map": anon_map}
# ============================================================================
# STAGE 3: SYNTHESIS
# ============================================================================
class Stage3Executor:
def __init__(self, s1: Dict, s2: Dict, timeout: int = 60):
self.s1 = s1
self.s2 = s2
self.timeout = timeout
def _generate_prompt(self, query: str, anon_map: Dict) -> str:
text = f"Query: {query}\n\nResponses:\n"
for a, m in anon_map.items():
text += f"{a}: {self.s1[m]['response']}\n\n"
text += "Synthesize final answer"
return text
def _call_chairman(self, prompt: str, model: str) -> Optional[str]:
try:
config = LLM_CONFIGS[model]
api_key = os.getenv(config.api_key_env)
if not api_key:
return None
if config.provider == APIProvider.GOOGLE:
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.5, "maxOutputTokens": 4096}}
headers = config.headers_template.copy()
headers["x-goog-api-key"] = api_key
elif config.provider == APIProvider.ANTHROPIC:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": prompt}]
payload["max_tokens"] = 4096
headers = config.headers_template.copy()
headers["x-api-key"] = api_key
else:
payload = config.request_payload_template.copy()
payload["messages"] = [{"role": "user", "content": prompt}]
payload["max_tokens"] = 4096
headers = config.headers_template.copy()
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
response.raise_for_status()
return config.response_extractor(response)
except Exception as e:
logger.error(f"Chairman error: {str(e)}")
return None
def execute(self, query: str, model: str, anon_map: Dict) -> Dict[str, Any]:
prompt = self._generate_prompt(query, anon_map)
final = self._call_chairman(prompt, model)
return {"final_response": final or "Unable to synthesize"}
# ============================================================================
# ORCHESTRATOR
# ============================================================================
class LLMCouncil:
def __init__(self, models: List[str], chairman: str):
self.models = models
self.chairman = chairman
def execute(self, query: str) -> Dict[str, Any]:
try:
logger.info("STAGE 1...")
s1 = Stage1Executor(self.models)
s1_resp = s1.execute(query)
if not s1_resp:
return {"error": "Stage 1 failed: No responses"}
logger.info("STAGE 2...")
s2 = Stage2Executor(s1_resp)
s2_result = s2.execute(query)
logger.info("STAGE 3...")
s3 = Stage3Executor(s1_resp, s2_result["reviews"])
s3_result = s3.execute(query, self.chairman, s2_result["anonymous_map"])
return {
"stage_1": {model: resp["response"] for model, resp in s1_resp.items()},
"stage_2": s2_result["reviews"],
"stage_3": s3_result["final_response"],
}
except Exception as e:
logger.error(f"Error: {str(e)}")
return {"error": str(e)}
# ============================================================================
# GRADIO UI
# ============================================================================
def run_council(query: str, models_str: str, chairman: str) -> Tuple[str, str, str]:
"""Run council and return results"""
if not query.strip():
return ("Enter a query", "", "")
if not models_str.strip():
return ("Select models (comma-separated)", "", "")
models = [m.strip() for m in models_str.split(",")]
models = [m for m in models if m in LLM_CONFIGS]
if len(models) < 2:
return ("Select at least 2 valid models", "", "")
if chairman not in LLM_CONFIGS:
return ("Select valid chairman", "", "")
try:
council = LLMCouncil(models, chairman)
result = council.execute(query)
if "error" in result:
return (f"❌ {result['error']}", "", "")
# Final Synthesis
final = result.get("stage_3", "No response")
# Stage 1
stage1 = "## Stage 1: Model Responses\n\n"
for model, resp in result.get("stage_1", {}).items():
stage1 += f"**{model}**\n{resp}\n\n"
# Stage 2
stage2 = "## Stage 2: Reviews\n\n"
for model, review in result.get("stage_2", {}).items():
stage2 += f"**{model}**\n{json.dumps(review, indent=2)}\n\n"
return (final, stage1, stage2)
except Exception as e:
return (f"Error: {str(e)}", "", "")
def get_api_status() -> str:
"""Get API status"""
status = "## API Status\n\n"
providers = {}
for model in LLM_CONFIGS.keys():
config = LLM_CONFIGS[model]
key_set = os.getenv(config.api_key_env) is not None
if config.api_key_env not in providers:
providers[config.api_key_env] = key_set
for provider, is_set in sorted(providers.items()):
icon = "βœ“" if is_set else "βœ—"
status += f"{icon} {provider}: {'OK' if is_set else 'MISSING'}\n\n"
return status
# ============================================================================
# MAIN
# ============================================================================
def create_interface():
"""Create Gradio interface"""
available_models = sorted(list(LLM_CONFIGS.keys()))
default_models = ", ".join(available_models[:3])
demo = gr.Blocks()
with demo:
gr.Markdown("""
# πŸ›οΈ LLM Council
**3-Stage Consensus AI Pipeline with 12+ Models**
βœ… Stage 1: Parallel opinions from all selected models
βœ… Stage 2: Anonymous peer review and ranking
βœ… Stage 3: Chairman synthesizes final consensus
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### βš™οΈ Setup")
gr.Markdown(get_api_status())
models_input = gr.Textbox(
label="Models (comma-separated)",
value=default_models,
lines=3,
)
chairman_input = gr.Dropdown(
choices=available_models,
label="Chairman Model",
value=available_models[0] if available_models else None
)
gr.Markdown("""
### πŸ“‹ Available Models:
- **Groq**: Llama-3.3-70B, Llama-3.2-90B-Vision
- **Google**: Gemini-2.0-Flash, Gemini-2.0-Pro
- **Claude**: Claude-3.5-Sonnet, Claude-3-Opus, Claude-3-Haiku
- **OpenAI**: GPT-4o, GPT-4o-mini, GPT-4-Turbo
- **Perplexity**: Sonar-Large
- **OpenRouter**: Mistral-7B, Qwen-2.5-72B, DeepSeek-R1
""")
with gr.Column(scale=2):
gr.Markdown("### 🎯 Query")
query_input = gr.Textbox(
label="Your Question",
lines=4,
placeholder="Ask anything...",
)
run_btn = gr.Button("πŸš€ Run Council", variant="primary")
with gr.Tabs():
with gr.TabItem("Final"):
final_out = gr.Markdown()
with gr.TabItem("Stage 1"):
s1_out = gr.Markdown()
with gr.TabItem("Stage 2"):
s2_out = gr.Markdown()
run_btn.click(
fn=run_council,
inputs=[query_input, models_input, chairman_input],
outputs=[final_out, s1_out, s2_out]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)