Spaces:

1qwsd
/

LLM_council

Sleeping

App Files Files Community

LLM_council / app.py

1qwsd

Update app.py

27b7103 verified about 2 months ago

raw

history blame contribute delete

26.2 kB

	import sys
	import subprocess

	def install_packages():
	"""Automatically install missing packages"""
	required_packages = [
	'gradio',
	'requests',
	'python-dotenv',
	]

	for package in required_packages:
	try:
	__import__(package.replace('-', '_'))
	print(f"✓ {package} already installed")
	except ImportError:
	print(f"Installing {package}...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])
	print(f"✓ {package} installed")

	print("Checking dependencies...")
	install_packages()
	print("✓ All dependencies ready!\n")

	import os
	import json
	import time
	import logging
	from typing import List, Dict, Any, Optional, Tuple
	from datetime import datetime
	from dataclasses import dataclass
	from enum import Enum
	import random

	import gradio as gr
	import requests
	from dotenv import load_dotenv
	from concurrent.futures import ThreadPoolExecutor, as_completed

	load_dotenv()

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# ============================================================================
	# CONFIGURATION
	# ============================================================================

	class APIProvider(Enum):
	GROQ = "groq"
	GOOGLE = "google"
	ANTHROPIC = "anthropic"
	OPENAI = "openai"
	PERPLEXITY = "perplexity"
	OPENROUTER = "openrouter"

	@dataclass
	class LLMConfig:
	provider: APIProvider
	model_name: str
	api_key_env: str
	base_url: str
	headers_template: Dict[str, str]
	request_payload_template: Dict[str, Any]
	response_extractor: callable
	rate_limit: int

	# ============================================================================
	# LLM CONFIGURATIONS - ALL 12+ MODELS FROM ORIGINAL APP.PY
	# ============================================================================

	LLM_CONFIGS: Dict[str, LLMConfig] = {
	# ===== GROQ (Ultra-Fast, Free) =====
	"Llama-3.3-70B (Groq)": LLMConfig(
	provider=APIProvider.GROQ,
	model_name="llama-3.3-70b-versatile",
	api_key_env=os.getenv("groq"),
	base_url="https://api.groq.com/openai/v1/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "llama-3.3-70b-versatile",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	"top_p": 0.9,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=30,
	),

	"Llama-3.2-90B-Vision (Groq)": LLMConfig(
	provider=APIProvider.GROQ,
	model_name="llama-3.2-90b-vision-preview",
	api_key_env=os.getenv("groq"),
	base_url="https://api.groq.com/openai/v1/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "llama-3.2-90b-vision-preview",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=30,
	),

	# ===== GOOGLE (Gemini, Free Tier) =====
	"Gemini-2.0-Flash": LLMConfig(
	provider=APIProvider.GOOGLE,
	model_name="gemini-2.0-flash",
	api_key_env=os.getenv("gemini"),
	base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent",
	headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"contents": [{"parts": [{"text": ""}]}],
	"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
	},
	response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"],
	rate_limit=60,
	),

	"Gemini-2.0-Pro": LLMConfig(
	provider=APIProvider.GOOGLE,
	model_name="gemini-2.0-pro",
	api_key_env=os.getenv("gemini"),
	base_url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro:generateContent",
	headers_template={"x-goog-api-key": "{api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"contents": [{"parts": [{"text": ""}]}],
	"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
	},
	response_extractor=lambda r: r.json()["candidates"][0]["content"]["parts"][0]["text"],
	rate_limit=60,
	),

	# ===== ANTHROPIC (Claude) =====
	"Claude-3.5-Sonnet": LLMConfig(
	provider=APIProvider.ANTHROPIC,
	model_name="claude-3-5-sonnet-20241022",
	api_key_env="ANTHROPIC_API_KEY",
	base_url="https://api.anthropic.com/v1/messages",
	headers_template={
	"x-api-key": "{api_key}",
	"anthropic-version": "2023-06-01",
	"content-type": "application/json"
	},
	request_payload_template={
	"model": "claude-3-5-sonnet-20241022",
	"messages": [],
	"max_tokens": 1024,
	"temperature": 0.7,
	},
	response_extractor=lambda r: r.json()["content"][0]["text"],
	rate_limit=50,
	),

	"Claude-3-Opus": LLMConfig(
	provider=APIProvider.ANTHROPIC,
	model_name="claude-3-opus-20240229",
	api_key_env="ANTHROPIC_API_KEY",
	base_url="https://api.anthropic.com/v1/messages",
	headers_template={
	"x-api-key": "{api_key}",
	"anthropic-version": "2023-06-01",
	"content-type": "application/json"
	},
	request_payload_template={
	"model": "claude-3-opus-20240229",
	"messages": [],
	"max_tokens": 1024,
	"temperature": 0.7,
	},
	response_extractor=lambda r: r.json()["content"][0]["text"],
	rate_limit=50,
	),

	"Claude-3-Haiku": LLMConfig(
	provider=APIProvider.ANTHROPIC,
	model_name="claude-3-haiku-20240307",
	api_key_env="ANTHROPIC_API_KEY",
	base_url="https://api.anthropic.com/v1/messages",
	headers_template={
	"x-api-key": "{api_key}",
	"anthropic-version": "2023-06-01",
	"content-type": "application/json"
	},
	request_payload_template={
	"model": "claude-3-haiku-20240307",
	"messages": [],
	"max_tokens": 1024,
	"temperature": 0.7,
	},
	response_extractor=lambda r: r.json()["content"][0]["text"],
	rate_limit=100,
	),

	# ===== OPENAI (ChatGPT & GPT-4) =====
	"GPT-4-Turbo": LLMConfig(
	provider=APIProvider.OPENAI,
	model_name="gpt-4-turbo",
	api_key_env=os.getenv("openai"),
	base_url="https://api.openai.com/v1/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "gpt-4-turbo",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=50,
	),

	"GPT-4o": LLMConfig(
	provider=APIProvider.OPENAI,
	model_name="gpt-4o",
	api_key_env=os.getenv("openai"),
	base_url="https://api.openai.com/v1/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "gpt-4o",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=50,
	),

	"GPT-4o-mini": LLMConfig(
	provider=APIProvider.OPENAI,
	model_name="gpt-4o-mini",
	api_key_env=os.getenv("openai"),
	base_url="https://api.openai.com/v1/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "gpt-4o-mini",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=50,
	),

	# ===== PERPLEXITY =====
	"Perplexity-Sonar-Large": LLMConfig(
	provider=APIProvider.PERPLEXITY,
	model_name="llama-3.1-sonar-large-128k-online",
	api_key_env=os.getenv("perplexity"),
	base_url="https://api.perplexity.ai/chat/completions",
	headers_template={"Authorization": "Bearer {api_key}", "Content-Type": "application/json"},
	request_payload_template={
	"model": "llama-3.1-sonar-large-128k-online",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=40,
	),

	# ===== OPENROUTER =====
	"Mistral-7B": LLMConfig(
	provider=APIProvider.OPENROUTER,
	model_name="mistralai/mistral-7b-instruct:free",
	api_key_env=os.getenv("openrouter"),
	base_url="https://openrouter.ai/api/v1/chat/completions",
	headers_template={
	"Authorization": "Bearer {api_key}",
	"Content-Type": "application/json",
	"HTTP-Referer": "http://localhost"
	},
	request_payload_template={
	"model": "mistralai/mistral-7b-instruct:free",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=20,
	),

	"Qwen-2.5-72B": LLMConfig(
	provider=APIProvider.OPENROUTER,
	model_name="qwen/qwen-2.5-72b-instruct:free",
	api_key_env=os.getenv("openrouter"),
	base_url="https://openrouter.ai/api/v1/chat/completions",
	headers_template={
	"Authorization": "Bearer {api_key}",
	"Content-Type": "application/json",
	"HTTP-Referer": "http://localhost"
	},
	request_payload_template={
	"model": "qwen/qwen-2.5-72b-instruct:free",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=20,
	),

	"DeepSeek-R1": LLMConfig(
	provider=APIProvider.OPENROUTER,
	model_name="deepseek/deepseek-r1:free",
	api_key_env=os.getenv("openrouter"),
	base_url="https://openrouter.ai/api/v1/chat/completions",
	headers_template={
	"Authorization": "Bearer {api_key}",
	"Content-Type": "application/json",
	"HTTP-Referer": "http://localhost"
	},
	request_payload_template={
	"model": "deepseek/deepseek-r1:free",
	"messages": [],
	"temperature": 0.7,
	"max_tokens": 1024,
	},
	response_extractor=lambda r: r.json()["choices"][0]["message"]["content"],
	rate_limit=15,
	),
	}

	# ============================================================================
	# STAGE 1: PARALLEL OPINIONS
	# ============================================================================

	class Stage1Executor:
	def __init__(self, models: List[str], timeout: int = 45):
	self.models = models
	self.timeout = timeout
	self.responses: Dict[str, Dict[str, Any]] = {}

	def _call_llm(self, model_name: str, user_query: str) -> Optional[str]:
	try:
	config = LLM_CONFIGS[model_name]
	api_key = os.getenv(config.api_key_env)

	if not api_key:
	logger.warning(f"API key missing: {config.api_key_env}")
	return None

	if config.provider == APIProvider.GOOGLE:
	payload = {
	"contents": [{"parts": [{"text": user_query}]}],
	"generationConfig": {"temperature": 0.7, "maxOutputTokens": 1024},
	}
	headers = config.headers_template.copy()
	headers["x-goog-api-key"] = api_key
	elif config.provider == APIProvider.ANTHROPIC:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": user_query}]
	headers = config.headers_template.copy()
	headers["x-api-key"] = api_key
	else:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": user_query}]
	headers = config.headers_template.copy()
	headers["Authorization"] = f"Bearer {api_key}"

	response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
	response.raise_for_status()

	result = config.response_extractor(response)
	logger.info(f"✓ {model_name}")
	return result

	except Exception as e:
	logger.error(f"✗ {model_name}: {str(e)}")
	return None

	def execute(self, user_query: str) -> Dict[str, Dict[str, Any]]:
	self.responses = {}
	with ThreadPoolExecutor(max_workers=min(len(self.models), 8)) as executor:
	future_to_model = {executor.submit(self._call_llm, model, user_query): model for model in self.models}
	for future in as_completed(future_to_model):
	model_name = future_to_model[future]
	try:
	response = future.result()
	if response:
	self.responses[model_name] = {"response": response, "timestamp": datetime.now().isoformat()}
	except Exception as e:
	logger.error(f"Error {model_name}: {str(e)}")
	return self.responses

	# ============================================================================
	# STAGE 2: PEER REVIEW
	# ============================================================================

	class Stage2Executor:
	def __init__(self, stage1_responses: Dict[str, Dict[str, Any]], timeout: int = 60):
	self.stage1_responses = stage1_responses
	self.timeout = timeout

	def _anonymize_responses(self) -> Dict[str, str]:
	models = list(self.stage1_responses.keys())
	anonymous_map = {}
	shuffled = models.copy()
	random.shuffle(shuffled)
	for idx, model in enumerate(shuffled):
	anonymous_map[f"Model_{chr(65 + idx)}"] = model
	return anonymous_map

	def _generate_review_prompt(self, anon_map: Dict[str, str], query: str) -> str:
	text = f"Query: {query}\n\nReview:\n"
	for anon_name, model in anon_map.items():
	text += f"{anon_name}: {self.stage1_responses[model]['response']}\n\n"
	text += "Rank models as JSON: {\"rankings\": [{\"model\": \"Model_X\", \"score\": 9}]}"
	return text

	def _call_reviewer(self, model: str, prompt: str) -> Optional[Dict]:
	try:
	config = LLM_CONFIGS[model]
	api_key = os.getenv(config.api_key_env)
	if not api_key:
	return None

	if config.provider == APIProvider.GOOGLE:
	payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.3, "maxOutputTokens": 2048}}
	headers = config.headers_template.copy()
	headers["x-goog-api-key"] = api_key
	elif config.provider == APIProvider.ANTHROPIC:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": prompt}]
	payload["max_tokens"] = 2048
	headers = config.headers_template.copy()
	headers["x-api-key"] = api_key
	else:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": prompt}]
	payload["max_tokens"] = 2048
	headers = config.headers_template.copy()
	headers["Authorization"] = f"Bearer {api_key}"

	response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
	response.raise_for_status()
	result = config.response_extractor(response)

	try:
	j_start = result.find('{')
	j_end = result.rfind('}') + 1
	if j_start != -1 and j_end > j_start:
	return json.loads(result[j_start:j_end])
	except:
	pass
	return {"raw": result}

	except Exception as e:
	logger.error(f"Review error: {str(e)}")
	return None

	def execute(self, query: str) -> Dict[str, Any]:
	anon_map = self._anonymize_responses()
	prompt = self._generate_review_prompt(anon_map, query)
	reviews = {}
	for model in self.stage1_responses.keys():
	result = self._call_reviewer(model, prompt)
	if result:
	reviews[model] = result
	return {"reviews": reviews, "anonymous_map": anon_map}

	# ============================================================================
	# STAGE 3: SYNTHESIS
	# ============================================================================

	class Stage3Executor:
	def __init__(self, s1: Dict, s2: Dict, timeout: int = 60):
	self.s1 = s1
	self.s2 = s2
	self.timeout = timeout

	def _generate_prompt(self, query: str, anon_map: Dict) -> str:
	text = f"Query: {query}\n\nResponses:\n"
	for a, m in anon_map.items():
	text += f"{a}: {self.s1[m]['response']}\n\n"
	text += "Synthesize final answer"
	return text

	def _call_chairman(self, prompt: str, model: str) -> Optional[str]:
	try:
	config = LLM_CONFIGS[model]
	api_key = os.getenv(config.api_key_env)
	if not api_key:
	return None

	if config.provider == APIProvider.GOOGLE:
	payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.5, "maxOutputTokens": 4096}}
	headers = config.headers_template.copy()
	headers["x-goog-api-key"] = api_key
	elif config.provider == APIProvider.ANTHROPIC:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": prompt}]
	payload["max_tokens"] = 4096
	headers = config.headers_template.copy()
	headers["x-api-key"] = api_key
	else:
	payload = config.request_payload_template.copy()
	payload["messages"] = [{"role": "user", "content": prompt}]
	payload["max_tokens"] = 4096
	headers = config.headers_template.copy()
	headers["Authorization"] = f"Bearer {api_key}"

	response = requests.post(config.base_url, json=payload, headers=headers, timeout=self.timeout)
	response.raise_for_status()
	return config.response_extractor(response)
	except Exception as e:
	logger.error(f"Chairman error: {str(e)}")
	return None

	def execute(self, query: str, model: str, anon_map: Dict) -> Dict[str, Any]:
	prompt = self._generate_prompt(query, anon_map)
	final = self._call_chairman(prompt, model)
	return {"final_response": final or "Unable to synthesize"}

	# ============================================================================
	# ORCHESTRATOR
	# ============================================================================

	class LLMCouncil:
	def __init__(self, models: List[str], chairman: str):
	self.models = models
	self.chairman = chairman

	def execute(self, query: str) -> Dict[str, Any]:
	try:
	logger.info("STAGE 1...")
	s1 = Stage1Executor(self.models)
	s1_resp = s1.execute(query)

	if not s1_resp:
	return {"error": "Stage 1 failed: No responses"}

	logger.info("STAGE 2...")
	s2 = Stage2Executor(s1_resp)
	s2_result = s2.execute(query)

	logger.info("STAGE 3...")
	s3 = Stage3Executor(s1_resp, s2_result["reviews"])
	s3_result = s3.execute(query, self.chairman, s2_result["anonymous_map"])

	return {
	"stage_1": {model: resp["response"] for model, resp in s1_resp.items()},
	"stage_2": s2_result["reviews"],
	"stage_3": s3_result["final_response"],
	}

	except Exception as e:
	logger.error(f"Error: {str(e)}")
	return {"error": str(e)}

	# ============================================================================
	# GRADIO UI
	# ============================================================================

	def run_council(query: str, models_str: str, chairman: str) -> Tuple[str, str, str]:
	"""Run council and return results"""
	if not query.strip():
	return ("Enter a query", "", "")

	if not models_str.strip():
	return ("Select models (comma-separated)", "", "")

	models = [m.strip() for m in models_str.split(",")]
	models = [m for m in models if m in LLM_CONFIGS]

	if len(models) < 2:
	return ("Select at least 2 valid models", "", "")

	if chairman not in LLM_CONFIGS:
	return ("Select valid chairman", "", "")

	try:
	council = LLMCouncil(models, chairman)
	result = council.execute(query)

	if "error" in result:
	return (f"❌ {result['error']}", "", "")

	# Final Synthesis
	final = result.get("stage_3", "No response")

	# Stage 1
	stage1 = "## Stage 1: Model Responses\n\n"
	for model, resp in result.get("stage_1", {}).items():
	stage1 += f"{model}\n{resp}\n\n"

	# Stage 2
	stage2 = "## Stage 2: Reviews\n\n"
	for model, review in result.get("stage_2", {}).items():
	stage2 += f"{model}\n{json.dumps(review, indent=2)}\n\n"

	return (final, stage1, stage2)

	except Exception as e:
	return (f"Error: {str(e)}", "", "")

	def get_api_status() -> str:
	"""Get API status"""
	status = "## API Status\n\n"
	providers = {}

	for model in LLM_CONFIGS.keys():
	config = LLM_CONFIGS[model]
	key_set = os.getenv(config.api_key_env) is not None
	if config.api_key_env not in providers:
	providers[config.api_key_env] = key_set

	for provider, is_set in sorted(providers.items()):
	icon = "✓" if is_set else "✗"
	status += f"{icon} {provider}: {'OK' if is_set else 'MISSING'}\n\n"

	return status

	# ============================================================================
	# MAIN
	# ============================================================================

	def create_interface():
	"""Create Gradio interface"""
	available_models = sorted(list(LLM_CONFIGS.keys()))
	default_models = ", ".join(available_models[:3])

	demo = gr.Blocks()

	with demo:
	gr.Markdown("""
	# 🏛️ LLM Council

	3-Stage Consensus AI Pipeline with 12+ Models

	✅ Stage 1: Parallel opinions from all selected models
	✅ Stage 2: Anonymous peer review and ranking
	✅ Stage 3: Chairman synthesizes final consensus
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Setup")
	gr.Markdown(get_api_status())

	models_input = gr.Textbox(
	label="Models (comma-separated)",
	value=default_models,
	lines=3,
	)

	chairman_input = gr.Dropdown(
	choices=available_models,
	label="Chairman Model",
	value=available_models[0] if available_models else None
	)

	gr.Markdown("""
	### 📋 Available Models:
	- Groq: Llama-3.3-70B, Llama-3.2-90B-Vision
	- Google: Gemini-2.0-Flash, Gemini-2.0-Pro
	- Claude: Claude-3.5-Sonnet, Claude-3-Opus, Claude-3-Haiku
	- OpenAI: GPT-4o, GPT-4o-mini, GPT-4-Turbo
	- Perplexity: Sonar-Large
	- OpenRouter: Mistral-7B, Qwen-2.5-72B, DeepSeek-R1
	""")

	with gr.Column(scale=2):
	gr.Markdown("### 🎯 Query")
	query_input = gr.Textbox(
	label="Your Question",
	lines=4,
	placeholder="Ask anything...",
	)

	run_btn = gr.Button("🚀 Run Council", variant="primary")

	with gr.Tabs():
	with gr.TabItem("Final"):
	final_out = gr.Markdown()
	with gr.TabItem("Stage 1"):
	s1_out = gr.Markdown()
	with gr.TabItem("Stage 2"):
	s2_out = gr.Markdown()

	run_btn.click(
	fn=run_council,
	inputs=[query_input, models_input, chairman_input],
	outputs=[final_out, s1_out, s2_out]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(server_name="0.0.0.0", server_port=7860, share=False)