Spaces:
Sleeping
Sleeping
File size: 7,116 Bytes
343cc98 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | import gradio as gr
import requests
import json
import time
from datetime import datetime
class OllamaSAAPBenchmark:
def __init__(self, base_url="http://localhost:11434"):
self.base_url = base_url
def test_agent_response(self, prompt, model, agent_role="General"):
"""Test Agent-spezifische Responses für SAAP"""
# SAAP-spezifische Prompts je nach Agent-Rolle
saap_prompts = {
"Jane": f"Als KI-Architektin: {prompt}",
"John": f"Als Entwickler: {prompt}",
"Justus": f"Als Rechtsexperte: {prompt}",
"General": prompt
}
final_prompt = saap_prompts.get(agent_role, prompt)
start_time = time.time()
try:
response = requests.post(
f"{self.base_url}/api/generate",
json={
"model": model,
"prompt": final_prompt,
"stream": False,
"options": {"temperature": 0.7, "num_predict": 256}
},
timeout=60
)
end_time = time.time()
if response.status_code == 200:
result = response.json()
return {
"response": result.get("response", ""),
"time": f"{end_time - start_time:.2f}s",
"model": model,
"agent_role": agent_role,
"tokens": len(result.get("response", "").split()),
"status": "✅ Success"
}
else:
return {"status": f"❌ Error {response.status_code}", "time": f"{end_time - start_time:.2f}s"}
except Exception as e:
return {"status": f"❌ Connection Error: {str(e)[:50]}...", "time": f"{time.time() - start_time:.2f}s"}
def list_models(self):
try:
response = requests.get(f"{self.base_url}/api/tags")
if response.status_code == 200:
models = response.json().get("models", [])
return [model["name"] for model in models]
return ["Connection failed - check if Ollama is running"]
except:
return ["❌ Cannot connect to Ollama"]
# Initialize benchmark system
benchmark = OllamaSAAPBenchmark()
available_models = benchmark.list_models()
# SAAP Benchmark Interface
def run_saap_benchmark(prompt, selected_models, agent_role):
if not prompt.strip():
return "⚠️ Bitte geben Sie einen Test-Prompt ein."
results = []
results.append(f"# 🚀 SAAP Multi-Agent Performance Benchmark")
results.append(f"**Agent Role:** {agent_role}")
results.append(f"**Test Prompt:** {prompt}")
results.append(f"**Models:** {', '.join(selected_models)}")
results.append(f"**Timestamp:** {datetime.now().strftime('%H:%M:%S')}")
results.append("---")
total_time = 0
for model in selected_models:
if model in available_models:
result = benchmark.test_agent_response(prompt, model, agent_role)
results.append(f"## 🤖 {model.upper()} ({agent_role})")
results.append(f"**Status:** {result.get('status', '❌ Error')}")
results.append(f"**Response Time:** {result.get('time', 'N/A')}")
results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
if 'response' in result and result['response']:
preview = result['response'][:100].replace('\n', ' ')
results.append(f"**Response Preview:** {preview}...")
results.append("---")
# Add to total time for averages
try:
time_val = float(result.get('time', '0').rstrip('s'))
total_time += time_val
except:
pass
# Performance Summary
if selected_models:
avg_time = total_time / len(selected_models)
results.append(f"## 📊 Performance Summary")
results.append(f"**Average Response Time:** {avg_time:.2f}s")
results.append(f"**Total Models Tested:** {len(selected_models)}")
# SAAP Performance Assessment
if avg_time < 2.0:
results.append(f"**SAAP Assessment:** ✅ Excellent for real-time multi-agent coordination")
elif avg_time < 5.0:
results.append(f"**SAAP Assessment:** ⚠️ Acceptable for batch processing")
else:
results.append(f"**SAAP Assessment:** ❌ Too slow for interactive agents")
return "\n".join(results)
# Gradio Interface
with gr.Blocks(title="SAAP Performance Benchmark", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🚀 SAAP - satware AI Agent Platform Benchmark")
gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Hochschule Worms** | **satware AG**")
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="SAAP Test Prompt",
placeholder="Beispiel: Entwickle eine Systemarchitektur für Multi-Agent Koordination",
lines=3,
value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
)
agent_role = gr.Dropdown(
choices=["General", "Jane", "John", "Justus"],
label="Agent Role Simulation",
value="General"
)
with gr.Column(scale=1):
model_selection = gr.CheckboxGroup(
choices=available_models,
label="Models to Benchmark",
value=available_models[:2] if len(available_models) >= 2 else available_models
)
benchmark_btn = gr.Button("🚀 Run SAAP Benchmark", variant="primary", size="lg")
# Results
results_output = gr.Markdown(label="Benchmark Results")
# Benchmark function
benchmark_btn.click(
run_saap_benchmark,
inputs=[prompt_input, model_selection, agent_role],
outputs=results_output
)
# System Info
with gr.Accordion("ℹ️ System Information", open=False):
gr.Markdown(f"""
### 📋 SAAP Test Environment
- **Available Models:** {len(available_models)}
- **Models:** {', '.join(available_models)}
- **Ollama Server:** {benchmark.base_url}
### 🎯 SAAP Performance Targets
- **Real-time Coordination:** < 2s per response
- **Batch Processing:** < 5s per response
- **Multi-Agent Sync:** < 10s for complex workflows
### 🎓 Master Thesis Context
**Projekt:** SAAP - satware AI Autonomous Agent Platform
**Student:** Hanan Wandji Danga
**Universität:** Hochschule Worms
**Betreuung:** Michael Wegener
**Ziel:** On-Premise Multi-Agent-Plattform mit lokalen LLMs
""")
if __name__ == "__main__":
demo.launch()
|