import gradio as gr import requests import json import time from datetime import datetime class OllamaSAAPBenchmark: def __init__(self, base_url="http://localhost:11434"): self.base_url = base_url def test_agent_response(self, prompt, model, agent_role="General"): """Test Agent-spezifische Responses für SAAP""" # SAAP-spezifische Prompts je nach Agent-Rolle saap_prompts = { "Jane": f"Als KI-Architektin: {prompt}", "John": f"Als Entwickler: {prompt}", "Justus": f"Als Rechtsexperte: {prompt}", "General": prompt } final_prompt = saap_prompts.get(agent_role, prompt) start_time = time.time() try: response = requests.post( f"{self.base_url}/api/generate", json={ "model": model, "prompt": final_prompt, "stream": False, "options": {"temperature": 0.7, "num_predict": 256} }, timeout=60 ) end_time = time.time() if response.status_code == 200: result = response.json() return { "response": result.get("response", ""), "time": f"{end_time - start_time:.2f}s", "model": model, "agent_role": agent_role, "tokens": len(result.get("response", "").split()), "status": "✅ Success" } else: return {"status": f"❌ Error {response.status_code}", "time": f"{end_time - start_time:.2f}s"} except Exception as e: return {"status": f"❌ Connection Error: {str(e)[:50]}...", "time": f"{time.time() - start_time:.2f}s"} def list_models(self): try: response = requests.get(f"{self.base_url}/api/tags") if response.status_code == 200: models = response.json().get("models", []) return [model["name"] for model in models] return ["Connection failed - check if Ollama is running"] except: return ["❌ Cannot connect to Ollama"] # Initialize benchmark system benchmark = OllamaSAAPBenchmark() available_models = benchmark.list_models() # SAAP Benchmark Interface def run_saap_benchmark(prompt, selected_models, agent_role): if not prompt.strip(): return "⚠️ Bitte geben Sie einen Test-Prompt ein." results = [] results.append(f"# 🚀 SAAP Multi-Agent Performance Benchmark") results.append(f"**Agent Role:** {agent_role}") results.append(f"**Test Prompt:** {prompt}") results.append(f"**Models:** {', '.join(selected_models)}") results.append(f"**Timestamp:** {datetime.now().strftime('%H:%M:%S')}") results.append("---") total_time = 0 for model in selected_models: if model in available_models: result = benchmark.test_agent_response(prompt, model, agent_role) results.append(f"## 🤖 {model.upper()} ({agent_role})") results.append(f"**Status:** {result.get('status', '❌ Error')}") results.append(f"**Response Time:** {result.get('time', 'N/A')}") results.append(f"**Tokens Generated:** {result.get('tokens', 0)}") if 'response' in result and result['response']: preview = result['response'][:100].replace('\n', ' ') results.append(f"**Response Preview:** {preview}...") results.append("---") # Add to total time for averages try: time_val = float(result.get('time', '0').rstrip('s')) total_time += time_val except: pass # Performance Summary if selected_models: avg_time = total_time / len(selected_models) results.append(f"## 📊 Performance Summary") results.append(f"**Average Response Time:** {avg_time:.2f}s") results.append(f"**Total Models Tested:** {len(selected_models)}") # SAAP Performance Assessment if avg_time < 2.0: results.append(f"**SAAP Assessment:** ✅ Excellent for real-time multi-agent coordination") elif avg_time < 5.0: results.append(f"**SAAP Assessment:** ⚠️ Acceptable for batch processing") else: results.append(f"**SAAP Assessment:** ❌ Too slow for interactive agents") return "\n".join(results) # Gradio Interface with gr.Blocks(title="SAAP Performance Benchmark", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🚀 SAAP - satware AI Agent Platform Benchmark") gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Hochschule Worms** | **satware AG**") with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox( label="SAAP Test Prompt", placeholder="Beispiel: Entwickle eine Systemarchitektur für Multi-Agent Koordination", lines=3, value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen." ) agent_role = gr.Dropdown( choices=["General", "Jane", "John", "Justus"], label="Agent Role Simulation", value="General" ) with gr.Column(scale=1): model_selection = gr.CheckboxGroup( choices=available_models, label="Models to Benchmark", value=available_models[:2] if len(available_models) >= 2 else available_models ) benchmark_btn = gr.Button("🚀 Run SAAP Benchmark", variant="primary", size="lg") # Results results_output = gr.Markdown(label="Benchmark Results") # Benchmark function benchmark_btn.click( run_saap_benchmark, inputs=[prompt_input, model_selection, agent_role], outputs=results_output ) # System Info with gr.Accordion("ℹ️ System Information", open=False): gr.Markdown(f""" ### 📋 SAAP Test Environment - **Available Models:** {len(available_models)} - **Models:** {', '.join(available_models)} - **Ollama Server:** {benchmark.base_url} ### 🎯 SAAP Performance Targets - **Real-time Coordination:** < 2s per response - **Batch Processing:** < 5s per response - **Multi-Agent Sync:** < 10s for complex workflows ### 🎓 Master Thesis Context **Projekt:** SAAP - satware AI Autonomous Agent Platform **Student:** Hanan Wandji Danga **Universität:** Hochschule Worms **Betreuung:** Michael Wegener **Ziel:** On-Premise Multi-Agent-Plattform mit lokalen LLMs """) if __name__ == "__main__": demo.launch()