Spaces:

burtenshaw
/

karpathy-llm-council

Running

App Files Files Community

burtenshaw commited on 21 days ago

Commit

f3325f9

1 Parent(s): 41799ac

add status to application

Browse files

Files changed (2) hide show

app.py +49 -19
backend/council.py +8 -1

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
-from backend.council import run_full_council
 from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL
-async def ask_council(question: str) -> str:
     """
     Ask the LLM Council a question.
@@ -13,41 +14,70 @@ async def ask_council(question: str) -> str:
     Args:
         question: The user's question to be discussed by the council.
-    Returns:
-        The final synthesized answer from the Council Chairman.
-    """.format(models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]), chairman=CHAIRMAN_MODEL.split("/")[-1])
     try:
-        # Run the council
-        # run_full_council returns (stage1, stage2, stage3, metadata)
-        _, _, stage3_result, _ = await run_full_council(question)
         response = stage3_result.get("response")
         if not response:
-            return "The council failed to generate a response."
-        return response
     except Exception as e:
-        return f"Error consulting the council: {str(e)}"
 description = """
 An MCP server that consults a council of LLMs to answer questions.
 ![image](https://pbs.twimg.com/media/G6ZZO7ragAAtnCZ?format=jpg)
-⚠️ To use this space, you'll need to duplicate it add your own open router api key.
 """
 demo = gr.Interface(
     fn=ask_council,
     inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
-    outputs=gr.Markdown(),
     title="LLM Council MCP Server",
-    description=description
 )
 if __name__ == "__main__":
     # Launch with mcp_server=True to expose as MCP
     demo.launch(mcp_server=True, show_error=True)

 import gradio as gr
+from backend.council import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final
 from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL
+async def ask_council(question: str, progress=gr.Progress()):
     """
     Ask the LLM Council a question.
     Args:
         question: The user's question to be discussed by the council.
+        progress: Gradio progress tracker.
+    Yields:
+        Status updates and finally the synthesized answer.
+    """.format(
+        models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]), chairman=CHAIRMAN_MODEL.split("/")[-1]
+    )
     try:
+        # Stage 1: Collect individual responses
+        progress(0.1, desc="Stage 1: Collecting individual responses...")
+        yield ("## 🟡 Stage 1: Collecting individual responses from council members...")
+        stage1_results = await stage1_collect_responses(question)
+        if not stage1_results:
+            yield "❌ The council failed to generate a response."
+            return
+        # Stage 2: Collect rankings
+        progress(0.4, desc="Stage 2: Council members are ranking responses...")
+        yield (
+            f"## 🟢 Stage 1 Complete ({len(stage1_results)} responses received).\n\n"
+            "## 🟡 Stage 2: Council members are ranking each other's responses..."
+        )
+        stage2_results, _ = await stage2_collect_rankings(question, stage1_results)
+        # Stage 3: Synthesize final answer
+        progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...")
+        yield (
+            "## 🟢 Stage 2 Complete (Rankings collected).\n\n"
+            "## 🟡 Stage 3: Chairman is synthesizing the final answer..."
+        )
+        stage3_result = await stage3_synthesize_final(question, stage1_results, stage2_results)
+        progress(1.0, desc="Complete!")
         response = stage3_result.get("response")
         if not response:
+            yield "❌ The council failed to generate a final synthesis."
+            return
+        yield response
     except Exception as e:
+        yield f"❌ Error consulting the council: {str(e)}"
 description = """
 An MCP server that consults a council of LLMs to answer questions.
 ![image](https://pbs.twimg.com/media/G6ZZO7ragAAtnCZ?format=jpg)
+⚠️ We're using 5 models in the council, so it takes a minute to answer.
 """
 demo = gr.Interface(
     fn=ask_council,
     inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
+    outputs=gr.Markdown(height=200),
     title="LLM Council MCP Server",
+    description=description,
 )
 if __name__ == "__main__":
     # Launch with mcp_server=True to expose as MCP
     demo.launch(mcp_server=True, show_error=True)

backend/council.py CHANGED Viewed

@@ -15,6 +15,7 @@ async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
     Returns:
         List of dicts with 'model' and 'response' keys
     """
     messages = [{"role": "user", "content": user_query}]
     # Query all models in parallel
@@ -29,6 +30,7 @@ async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
                 "response": response.get('content', '')
             })
     return stage1_results
@@ -46,6 +48,7 @@ async def stage2_collect_rankings(
     Returns:
         Tuple of (rankings list, label_to_model mapping)
     """
     # Create anonymized labels for responses (Response A, Response B, etc.)
     labels = [chr(65 + i) for i in range(len(stage1_results))]  # A, B, C, ...
@@ -109,6 +112,7 @@ Now provide your evaluation and ranking:"""
                 "parsed_ranking": parsed
             })
     return stage2_results, label_to_model
@@ -128,6 +132,7 @@ async def stage3_synthesize_final(
     Returns:
         Dict with 'model' and 'response' keys
     """
     # Build comprehensive context for chairman
     stage1_text = "\n\n".join([
         f"Model: {result['model']}\nResponse: {result['response']}"
@@ -163,11 +168,13 @@ Provide a clear, well-reasoned final answer that represents the council's collec
     if response is None:
         # Fallback if chairman fails
         return {
             "model": CHAIRMAN_MODEL,
             "response": "Error: Unable to generate final synthesis."
         }
     return {
         "model": CHAIRMAN_MODEL,
         "response": response.get('content', '')
@@ -318,7 +325,7 @@ async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]:
     # Calculate aggregate rankings
     aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
     # Stage 3: Synthesize final answer
     stage3_result = await stage3_synthesize_final(
         user_query,

     Returns:
         List of dicts with 'model' and 'response' keys
     """
+    print("STAGE 1: Collecting individual responses from council members...")
     messages = [{"role": "user", "content": user_query}]
     # Query all models in parallel
                 "response": response.get('content', '')
             })
+    print(f"STAGE 1 COMPLETE: Received {len(stage1_results)} responses.")
     return stage1_results
     Returns:
         Tuple of (rankings list, label_to_model mapping)
     """
+    print("STAGE 2: Council members are ranking each other's responses...")
     # Create anonymized labels for responses (Response A, Response B, etc.)
     labels = [chr(65 + i) for i in range(len(stage1_results))]  # A, B, C, ...
                 "parsed_ranking": parsed
             })
+    print("STAGE 2 COMPLETE: Rankings collected.")
     return stage2_results, label_to_model
     Returns:
         Dict with 'model' and 'response' keys
     """
+    print("STAGE 3: Chairman is synthesizing the final answer...")
     # Build comprehensive context for chairman
     stage1_text = "\n\n".join([
         f"Model: {result['model']}\nResponse: {result['response']}"
     if response is None:
         # Fallback if chairman fails
+        print("STAGE 3 ERROR: Unable to generate final synthesis.")
         return {
             "model": CHAIRMAN_MODEL,
             "response": "Error: Unable to generate final synthesis."
         }
+    print("STAGE 3 COMPLETE: Final answer synthesized.")
     return {
         "model": CHAIRMAN_MODEL,
         "response": response.get('content', '')
     # Calculate aggregate rankings
     aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
     # Stage 3: Synthesize final answer
     stage3_result = await stage3_synthesize_final(
         user_query,