burtenshaw
commited on
Commit
Β·
f3325f9
1
Parent(s):
41799ac
add status to application
Browse files- app.py +49 -19
- backend/council.py +8 -1
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from backend.council import
|
| 3 |
from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL
|
| 4 |
|
| 5 |
-
|
|
|
|
| 6 |
"""
|
| 7 |
Ask the LLM Council a question.
|
| 8 |
|
|
@@ -13,41 +14,70 @@ async def ask_council(question: str) -> str:
|
|
| 13 |
|
| 14 |
Args:
|
| 15 |
question: The user's question to be discussed by the council.
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
""".format(
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
try:
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
response = stage3_result.get("response")
|
| 27 |
if not response:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
except Exception as e:
|
| 33 |
-
|
| 34 |
|
| 35 |
|
| 36 |
description = """
|
| 37 |
An MCP server that consults a council of LLMs to answer questions.
|
| 38 |

|
| 39 |
-
β οΈ
|
| 40 |
"""
|
| 41 |
|
| 42 |
demo = gr.Interface(
|
| 43 |
fn=ask_council,
|
| 44 |
inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
|
| 45 |
-
outputs=gr.Markdown(),
|
| 46 |
title="LLM Council MCP Server",
|
| 47 |
-
description=description
|
| 48 |
)
|
| 49 |
|
| 50 |
if __name__ == "__main__":
|
| 51 |
# Launch with mcp_server=True to expose as MCP
|
| 52 |
demo.launch(mcp_server=True, show_error=True)
|
| 53 |
-
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from backend.council import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final
|
| 3 |
from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL
|
| 4 |
|
| 5 |
+
|
| 6 |
+
async def ask_council(question: str, progress=gr.Progress()):
|
| 7 |
"""
|
| 8 |
Ask the LLM Council a question.
|
| 9 |
|
|
|
|
| 14 |
|
| 15 |
Args:
|
| 16 |
question: The user's question to be discussed by the council.
|
| 17 |
+
progress: Gradio progress tracker.
|
| 18 |
|
| 19 |
+
Yields:
|
| 20 |
+
Status updates and finally the synthesized answer.
|
| 21 |
+
""".format(
|
| 22 |
+
models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]), chairman=CHAIRMAN_MODEL.split("/")[-1]
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
try:
|
| 26 |
+
# Stage 1: Collect individual responses
|
| 27 |
+
progress(0.1, desc="Stage 1: Collecting individual responses...")
|
| 28 |
+
yield ("## π‘ Stage 1: Collecting individual responses from council members...")
|
| 29 |
+
|
| 30 |
+
stage1_results = await stage1_collect_responses(question)
|
| 31 |
+
|
| 32 |
+
if not stage1_results:
|
| 33 |
+
yield "β The council failed to generate a response."
|
| 34 |
+
return
|
| 35 |
+
|
| 36 |
+
# Stage 2: Collect rankings
|
| 37 |
+
progress(0.4, desc="Stage 2: Council members are ranking responses...")
|
| 38 |
+
yield (
|
| 39 |
+
f"## π’ Stage 1 Complete ({len(stage1_results)} responses received).\n\n"
|
| 40 |
+
"## π‘ Stage 2: Council members are ranking each other's responses..."
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
stage2_results, _ = await stage2_collect_rankings(question, stage1_results)
|
| 44 |
+
|
| 45 |
+
# Stage 3: Synthesize final answer
|
| 46 |
+
progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...")
|
| 47 |
+
yield (
|
| 48 |
+
"## π’ Stage 2 Complete (Rankings collected).\n\n"
|
| 49 |
+
"## π‘ Stage 3: Chairman is synthesizing the final answer..."
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
stage3_result = await stage3_synthesize_final(question, stage1_results, stage2_results)
|
| 53 |
+
|
| 54 |
+
progress(1.0, desc="Complete!")
|
| 55 |
+
|
| 56 |
response = stage3_result.get("response")
|
| 57 |
if not response:
|
| 58 |
+
yield "β The council failed to generate a final synthesis."
|
| 59 |
+
return
|
| 60 |
+
|
| 61 |
+
yield response
|
| 62 |
+
|
| 63 |
except Exception as e:
|
| 64 |
+
yield f"β Error consulting the council: {str(e)}"
|
| 65 |
|
| 66 |
|
| 67 |
description = """
|
| 68 |
An MCP server that consults a council of LLMs to answer questions.
|
| 69 |

|
| 70 |
+
β οΈ We're using 5 models in the council, so it takes a minute to answer.
|
| 71 |
"""
|
| 72 |
|
| 73 |
demo = gr.Interface(
|
| 74 |
fn=ask_council,
|
| 75 |
inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
|
| 76 |
+
outputs=gr.Markdown(height=200),
|
| 77 |
title="LLM Council MCP Server",
|
| 78 |
+
description=description,
|
| 79 |
)
|
| 80 |
|
| 81 |
if __name__ == "__main__":
|
| 82 |
# Launch with mcp_server=True to expose as MCP
|
| 83 |
demo.launch(mcp_server=True, show_error=True)
|
|
|
backend/council.py
CHANGED
|
@@ -15,6 +15,7 @@ async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
|
|
| 15 |
Returns:
|
| 16 |
List of dicts with 'model' and 'response' keys
|
| 17 |
"""
|
|
|
|
| 18 |
messages = [{"role": "user", "content": user_query}]
|
| 19 |
|
| 20 |
# Query all models in parallel
|
|
@@ -29,6 +30,7 @@ async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
|
|
| 29 |
"response": response.get('content', '')
|
| 30 |
})
|
| 31 |
|
|
|
|
| 32 |
return stage1_results
|
| 33 |
|
| 34 |
|
|
@@ -46,6 +48,7 @@ async def stage2_collect_rankings(
|
|
| 46 |
Returns:
|
| 47 |
Tuple of (rankings list, label_to_model mapping)
|
| 48 |
"""
|
|
|
|
| 49 |
# Create anonymized labels for responses (Response A, Response B, etc.)
|
| 50 |
labels = [chr(65 + i) for i in range(len(stage1_results))] # A, B, C, ...
|
| 51 |
|
|
@@ -109,6 +112,7 @@ Now provide your evaluation and ranking:"""
|
|
| 109 |
"parsed_ranking": parsed
|
| 110 |
})
|
| 111 |
|
|
|
|
| 112 |
return stage2_results, label_to_model
|
| 113 |
|
| 114 |
|
|
@@ -128,6 +132,7 @@ async def stage3_synthesize_final(
|
|
| 128 |
Returns:
|
| 129 |
Dict with 'model' and 'response' keys
|
| 130 |
"""
|
|
|
|
| 131 |
# Build comprehensive context for chairman
|
| 132 |
stage1_text = "\n\n".join([
|
| 133 |
f"Model: {result['model']}\nResponse: {result['response']}"
|
|
@@ -163,11 +168,13 @@ Provide a clear, well-reasoned final answer that represents the council's collec
|
|
| 163 |
|
| 164 |
if response is None:
|
| 165 |
# Fallback if chairman fails
|
|
|
|
| 166 |
return {
|
| 167 |
"model": CHAIRMAN_MODEL,
|
| 168 |
"response": "Error: Unable to generate final synthesis."
|
| 169 |
}
|
| 170 |
|
|
|
|
| 171 |
return {
|
| 172 |
"model": CHAIRMAN_MODEL,
|
| 173 |
"response": response.get('content', '')
|
|
@@ -318,7 +325,7 @@ async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]:
|
|
| 318 |
|
| 319 |
# Calculate aggregate rankings
|
| 320 |
aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
|
| 321 |
-
|
| 322 |
# Stage 3: Synthesize final answer
|
| 323 |
stage3_result = await stage3_synthesize_final(
|
| 324 |
user_query,
|
|
|
|
| 15 |
Returns:
|
| 16 |
List of dicts with 'model' and 'response' keys
|
| 17 |
"""
|
| 18 |
+
print("STAGE 1: Collecting individual responses from council members...")
|
| 19 |
messages = [{"role": "user", "content": user_query}]
|
| 20 |
|
| 21 |
# Query all models in parallel
|
|
|
|
| 30 |
"response": response.get('content', '')
|
| 31 |
})
|
| 32 |
|
| 33 |
+
print(f"STAGE 1 COMPLETE: Received {len(stage1_results)} responses.")
|
| 34 |
return stage1_results
|
| 35 |
|
| 36 |
|
|
|
|
| 48 |
Returns:
|
| 49 |
Tuple of (rankings list, label_to_model mapping)
|
| 50 |
"""
|
| 51 |
+
print("STAGE 2: Council members are ranking each other's responses...")
|
| 52 |
# Create anonymized labels for responses (Response A, Response B, etc.)
|
| 53 |
labels = [chr(65 + i) for i in range(len(stage1_results))] # A, B, C, ...
|
| 54 |
|
|
|
|
| 112 |
"parsed_ranking": parsed
|
| 113 |
})
|
| 114 |
|
| 115 |
+
print("STAGE 2 COMPLETE: Rankings collected.")
|
| 116 |
return stage2_results, label_to_model
|
| 117 |
|
| 118 |
|
|
|
|
| 132 |
Returns:
|
| 133 |
Dict with 'model' and 'response' keys
|
| 134 |
"""
|
| 135 |
+
print("STAGE 3: Chairman is synthesizing the final answer...")
|
| 136 |
# Build comprehensive context for chairman
|
| 137 |
stage1_text = "\n\n".join([
|
| 138 |
f"Model: {result['model']}\nResponse: {result['response']}"
|
|
|
|
| 168 |
|
| 169 |
if response is None:
|
| 170 |
# Fallback if chairman fails
|
| 171 |
+
print("STAGE 3 ERROR: Unable to generate final synthesis.")
|
| 172 |
return {
|
| 173 |
"model": CHAIRMAN_MODEL,
|
| 174 |
"response": "Error: Unable to generate final synthesis."
|
| 175 |
}
|
| 176 |
|
| 177 |
+
print("STAGE 3 COMPLETE: Final answer synthesized.")
|
| 178 |
return {
|
| 179 |
"model": CHAIRMAN_MODEL,
|
| 180 |
"response": response.get('content', '')
|
|
|
|
| 325 |
|
| 326 |
# Calculate aggregate rankings
|
| 327 |
aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
|
| 328 |
+
|
| 329 |
# Stage 3: Synthesize final answer
|
| 330 |
stage3_result = await stage3_synthesize_final(
|
| 331 |
user_query,
|