import gradio as gr import google.generativeai as genai import asyncio from typing import List, Dict, Any import re class GeminiCouncil: """LLM Council using FREE Google Gemini API""" def __init__(self, api_key: str, council_models: List[str], chairman_model: str): genai.configure(api_key=api_key) self.council_models = council_models self.chairman_model = chairman_model # Initialize model objects self.models = { model: genai.GenerativeModel(model) for model in set(council_models + [chairman_model]) } async def call_model(self, model_name: str, prompt: str) -> str: """Call a Gemini model""" try: model = self.models[model_name] response = await asyncio.to_thread( model.generate_content, prompt, generation_config=genai.types.GenerationConfig( temperature=0.7, max_output_tokens=2000, ) ) return response.text except Exception as e: return f"[Error from {model_name}: {str(e)[:100]}]" async def stage1_first_opinions(self, query: str) -> Dict[str, str]: """Stage 1: Get initial responses from all council members""" prompt = f"""Question: {query} Please provide a clear, well-reasoned answer to this question. Be specific and insightful.""" tasks = [self.call_model(model, prompt) for model in self.council_models] responses = await asyncio.gather(*tasks) return dict(zip(self.council_models, responses)) async def stage2_peer_review(self, query: str, opinions: Dict[str, str]) -> Dict[str, Any]: """Stage 2: Each model reviews and ranks others' responses anonymously""" response_labels = [f"Response {chr(65+i)}" for i in range(len(opinions))] model_list = list(opinions.keys()) # Create anonymized responses text anonymized_text = f"Original Question: {query}\\n\\n" for label, model in zip(response_labels, model_list): anonymized_text += f"{label}:\\n{opinions[model]}\\n\\n" ranking_prompt = f"""You are evaluating multiple responses to a question. Please rank them from best to worst based on accuracy, insight, and helpfulness. {anonymized_text} Provide your ranking in this EXACT format: FINAL RANKING: 1. Response X 2. Response Y 3. Response Z 4. Response W Before your ranking, you may provide brief analysis of each response.""" tasks = [self.call_model(model, ranking_prompt) for model in self.council_models] rankings_raw = await asyncio.gather(*tasks) # Parse rankings rankings = {} for model, ranking_text in zip(self.council_models, rankings_raw): parsed = self._parse_ranking(ranking_text, response_labels) rankings[model] = { "full_review": ranking_text, "ranking": parsed } aggregate = self._calculate_aggregate_rankings(rankings, response_labels, model_list) return { "rankings": rankings, "aggregate": aggregate, "label_to_model": dict(zip(response_labels, model_list)) } def _parse_ranking(self, text: str, valid_labels: List[str]) -> List[str]: """Extract ranking from model response""" ranking_match = re.search(r'FINAL RANKING:(.+?)(?:$|\\n\\n)', text, re.DOTALL | re.IGNORECASE) if ranking_match: ranking_text = ranking_match.group(1) else: ranking_text = text found_labels = [] for label in valid_labels: if label in ranking_text: pos = ranking_text.find(label) found_labels.append((pos, label)) found_labels.sort(key=lambda x: x[0]) return [label for _, label in found_labels] if found_labels else valid_labels def _calculate_aggregate_rankings(self, rankings: Dict, labels: List[str], models: List[str]) -> List[tuple]: """Calculate average ranking position for each response""" avg_ranks = {} for model in models: total_rank = 0 count = 0 model_idx = models.index(model) model_label = labels[model_idx] for reviewer_rankings in rankings.values(): ranking_list = reviewer_rankings["ranking"] if model_label in ranking_list: position = ranking_list.index(model_label) + 1 total_rank += position count += 1 if count > 0: avg_ranks[model] = total_rank / count else: avg_ranks[model] = float('inf') return sorted(avg_ranks.items(), key=lambda x: x[1]) async def stage3_final_synthesis(self, query: str, opinions: Dict[str, str], review_data: Dict[str, Any]) -> str: """Stage 3: Chairman synthesizes final answer""" synthesis_prompt = f"""You are the Chairman synthesizing the council's work. Original Question: {query} COUNCIL RESPONSES: """ for i, (model, opinion) in enumerate(opinions.items(), 1): synthesis_prompt += f"\\n{i}. {model}:\\n{opinion}\\n" synthesis_prompt += "\\n\\nPEER REVIEW SUMMARY:\\n" for model, avg_rank in review_data["aggregate"][:3]: synthesis_prompt += f"- {model} (avg rank: {avg_rank:.2f})\\n" synthesis_prompt += """\\n\\nBased on all responses and peer evaluations, provide a comprehensive final answer that: 1. Synthesizes the best insights from all responses 2. Addresses any disagreements or different perspectives 3. Provides a clear, actionable conclusion """ final_answer = await self.call_model(self.chairman_model, synthesis_prompt) return final_answer async def run_council(self, query: str) -> str: """Run the complete 3-stage council process""" output = f"# 🏛️ LLM Council Deliberation\\n\\n**Question:** {query}\\n\\n" # Stage 1 output += "## 📝 Stage 1: First Opinions\\n\\n" opinions = await self.stage1_first_opinions(query) for model, opinion in opinions.items(): output += f"**{model}:**\\n{opinion[:300]}...\\n\\n" # Stage 2 output += "## 🔍 Stage 2: Peer Review\\n\\n" review_data = await self.stage2_peer_review(query, opinions) output += "**Rankings:**\\n" for model, data in review_data["rankings"].items(): output += f"- {model}: {', '.join(data['ranking'])}\\n" output += "\\n**Aggregate Rankings:**\\n" for model, avg_rank in review_data["aggregate"]: output += f"- {model}: {avg_rank:.2f}\\n" # Stage 3 output += "\\n## 🎯 Stage 3: Chairman's Synthesis\\n\\n" final_answer = await self.stage3_final_synthesis(query, opinions, review_data) output += final_answer return output # Gradio Interface def run_deliberation(api_key: str, question: str, progress=gr.Progress()): """Run the council deliberation""" if not api_key: return "❌ Please enter your Gemini API key first!\\n\\nGet one FREE at: https://aistudio.google.com/app/apikey" if not question: return "❌ Please enter a question for the council to deliberate on!" try: # Initialize council progress(0.1, desc="Initializing council...") council_models = [ "gemini-1.5-flash", "gemini-1.5-flash-8b", "gemini-1.5-pro", "gemini-2.0-flash-exp", ] chairman = "gemini-1.5-pro" council = GeminiCouncil(api_key, council_models, chairman) # Run deliberation progress(0.3, desc="Council deliberating...") result = asyncio.run(council.run_council(question)) progress(1.0, desc="Complete!") return result except Exception as e: return f"❌ Error: {str(e)}\\n\\nMake sure your API key is valid and you have internet connection." # Create Gradio interface with gr.Blocks(title="LLM Council - FREE Gemini Edition") as demo: gr.Markdown(""" # 🏛️ LLM Council - FREE Gemini Edition Watch multiple Gemini AI models debate, review each other, and reach collective intelligence! Based on [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council) """) with gr.Row(): with gr.Column(scale=1): gr.Markdown(""" ## 🚀 Quick Start 1. Get FREE API key: [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey) 2. Paste it below 3. Ask your question! 4. Watch the council deliberate! """) api_key_input = gr.Textbox( label="Gemini API Key", placeholder="Paste your FREE Gemini API key here...", type="password" ) question_input = gr.Textbox( label="Your Question", placeholder="E.g., How can ontological commitment be modeled in AI prompt engineering?", lines=3 ) submit_btn = gr.Button("🏛️ Convene the Council", variant="primary", size="lg") gr.Markdown(""" ## 🤖 The Council - **gemini-1.5-flash** - Fast & efficient - **gemini-1.5-flash-8b** - Compact perspective - **gemini-1.5-pro** - Most capable (Chairman) - **gemini-2.0-flash-exp** - Latest experimental ### Process: 1. Each model answers independently 2. Models review & rank each other anonymously 3. Chairman synthesizes final answer """) with gr.Column(scale=2): output = gr.Markdown(label="Council Deliberation") submit_btn.click( fn=run_deliberation, inputs=[api_key_input, question_input], outputs=output ) gr.Markdown(""" --- ## 💡 Example Questions - How can ontological commitment be modeled in AI prompt engineering? - What are the key principles of effective human-AI collaboration? - What ethical considerations should guide AI development? - How might Buddhist philosophy inform AI system design? ## 📚 About Created for W3C AI Knowledge Representation Community Group research on human-AI co-evolution. **Credits:** Paola Di Maio (Ronin Institute) & Claude (Anthropic) **License:** MIT """) if __name__ == "__main__": demo.launch()