Ollama-Test / app_local.py
Hwandji's picture
🚀 Switch to HuggingFace Cloud app for Space deployment
343cc98
import gradio as gr
import requests
import json
import time
from datetime import datetime
class OllamaSAAPBenchmark:
def __init__(self, base_url="http://localhost:11434"):
self.base_url = base_url
def test_agent_response(self, prompt, model, agent_role="General"):
"""Test Agent-spezifische Responses für SAAP"""
# SAAP-spezifische Prompts je nach Agent-Rolle
saap_prompts = {
"Jane": f"Als KI-Architektin: {prompt}",
"John": f"Als Entwickler: {prompt}",
"Justus": f"Als Rechtsexperte: {prompt}",
"General": prompt
}
final_prompt = saap_prompts.get(agent_role, prompt)
start_time = time.time()
try:
response = requests.post(
f"{self.base_url}/api/generate",
json={
"model": model,
"prompt": final_prompt,
"stream": False,
"options": {"temperature": 0.7, "num_predict": 256}
},
timeout=60
)
end_time = time.time()
if response.status_code == 200:
result = response.json()
return {
"response": result.get("response", ""),
"time": f"{end_time - start_time:.2f}s",
"model": model,
"agent_role": agent_role,
"tokens": len(result.get("response", "").split()),
"status": "✅ Success"
}
else:
return {"status": f"❌ Error {response.status_code}", "time": f"{end_time - start_time:.2f}s"}
except Exception as e:
return {"status": f"❌ Connection Error: {str(e)[:50]}...", "time": f"{time.time() - start_time:.2f}s"}
def list_models(self):
try:
response = requests.get(f"{self.base_url}/api/tags")
if response.status_code == 200:
models = response.json().get("models", [])
return [model["name"] for model in models]
return ["Connection failed - check if Ollama is running"]
except:
return ["❌ Cannot connect to Ollama"]
# Initialize benchmark system
benchmark = OllamaSAAPBenchmark()
available_models = benchmark.list_models()
# SAAP Benchmark Interface
def run_saap_benchmark(prompt, selected_models, agent_role):
if not prompt.strip():
return "⚠️ Bitte geben Sie einen Test-Prompt ein."
results = []
results.append(f"# 🚀 SAAP Multi-Agent Performance Benchmark")
results.append(f"**Agent Role:** {agent_role}")
results.append(f"**Test Prompt:** {prompt}")
results.append(f"**Models:** {', '.join(selected_models)}")
results.append(f"**Timestamp:** {datetime.now().strftime('%H:%M:%S')}")
results.append("---")
total_time = 0
for model in selected_models:
if model in available_models:
result = benchmark.test_agent_response(prompt, model, agent_role)
results.append(f"## 🤖 {model.upper()} ({agent_role})")
results.append(f"**Status:** {result.get('status', '❌ Error')}")
results.append(f"**Response Time:** {result.get('time', 'N/A')}")
results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
if 'response' in result and result['response']:
preview = result['response'][:100].replace('\n', ' ')
results.append(f"**Response Preview:** {preview}...")
results.append("---")
# Add to total time for averages
try:
time_val = float(result.get('time', '0').rstrip('s'))
total_time += time_val
except:
pass
# Performance Summary
if selected_models:
avg_time = total_time / len(selected_models)
results.append(f"## 📊 Performance Summary")
results.append(f"**Average Response Time:** {avg_time:.2f}s")
results.append(f"**Total Models Tested:** {len(selected_models)}")
# SAAP Performance Assessment
if avg_time < 2.0:
results.append(f"**SAAP Assessment:** ✅ Excellent for real-time multi-agent coordination")
elif avg_time < 5.0:
results.append(f"**SAAP Assessment:** ⚠️ Acceptable for batch processing")
else:
results.append(f"**SAAP Assessment:** ❌ Too slow for interactive agents")
return "\n".join(results)
# Gradio Interface
with gr.Blocks(title="SAAP Performance Benchmark", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🚀 SAAP - satware AI Agent Platform Benchmark")
gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Hochschule Worms** | **satware AG**")
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="SAAP Test Prompt",
placeholder="Beispiel: Entwickle eine Systemarchitektur für Multi-Agent Koordination",
lines=3,
value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
)
agent_role = gr.Dropdown(
choices=["General", "Jane", "John", "Justus"],
label="Agent Role Simulation",
value="General"
)
with gr.Column(scale=1):
model_selection = gr.CheckboxGroup(
choices=available_models,
label="Models to Benchmark",
value=available_models[:2] if len(available_models) >= 2 else available_models
)
benchmark_btn = gr.Button("🚀 Run SAAP Benchmark", variant="primary", size="lg")
# Results
results_output = gr.Markdown(label="Benchmark Results")
# Benchmark function
benchmark_btn.click(
run_saap_benchmark,
inputs=[prompt_input, model_selection, agent_role],
outputs=results_output
)
# System Info
with gr.Accordion("ℹ️ System Information", open=False):
gr.Markdown(f"""
### 📋 SAAP Test Environment
- **Available Models:** {len(available_models)}
- **Models:** {', '.join(available_models)}
- **Ollama Server:** {benchmark.base_url}
### 🎯 SAAP Performance Targets
- **Real-time Coordination:** < 2s per response
- **Batch Processing:** < 5s per response
- **Multi-Agent Sync:** < 10s for complex workflows
### 🎓 Master Thesis Context
**Projekt:** SAAP - satware AI Autonomous Agent Platform
**Student:** Hanan Wandji Danga
**Universität:** Hochschule Worms
**Betreuung:** Michael Wegener
**Ziel:** On-Premise Multi-Agent-Plattform mit lokalen LLMs
""")
if __name__ == "__main__":
demo.launch()