Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import json | |
| import time | |
| from datetime import datetime | |
| class OllamaSAAPBenchmark: | |
| def __init__(self, base_url="http://localhost:11434"): | |
| self.base_url = base_url | |
| def test_agent_response(self, prompt, model, agent_role="General"): | |
| """Test Agent-spezifische Responses für SAAP""" | |
| # SAAP-spezifische Prompts je nach Agent-Rolle | |
| saap_prompts = { | |
| "Jane": f"Als KI-Architektin: {prompt}", | |
| "John": f"Als Entwickler: {prompt}", | |
| "Justus": f"Als Rechtsexperte: {prompt}", | |
| "General": prompt | |
| } | |
| final_prompt = saap_prompts.get(agent_role, prompt) | |
| start_time = time.time() | |
| try: | |
| response = requests.post( | |
| f"{self.base_url}/api/generate", | |
| json={ | |
| "model": model, | |
| "prompt": final_prompt, | |
| "stream": False, | |
| "options": {"temperature": 0.7, "num_predict": 256} | |
| }, | |
| timeout=60 | |
| ) | |
| end_time = time.time() | |
| if response.status_code == 200: | |
| result = response.json() | |
| return { | |
| "response": result.get("response", ""), | |
| "time": f"{end_time - start_time:.2f}s", | |
| "model": model, | |
| "agent_role": agent_role, | |
| "tokens": len(result.get("response", "").split()), | |
| "status": "✅ Success" | |
| } | |
| else: | |
| return {"status": f"❌ Error {response.status_code}", "time": f"{end_time - start_time:.2f}s"} | |
| except Exception as e: | |
| return {"status": f"❌ Connection Error: {str(e)[:50]}...", "time": f"{time.time() - start_time:.2f}s"} | |
| def list_models(self): | |
| try: | |
| response = requests.get(f"{self.base_url}/api/tags") | |
| if response.status_code == 200: | |
| models = response.json().get("models", []) | |
| return [model["name"] for model in models] | |
| return ["Connection failed - check if Ollama is running"] | |
| except: | |
| return ["❌ Cannot connect to Ollama"] | |
| # Initialize benchmark system | |
| benchmark = OllamaSAAPBenchmark() | |
| available_models = benchmark.list_models() | |
| # SAAP Benchmark Interface | |
| def run_saap_benchmark(prompt, selected_models, agent_role): | |
| if not prompt.strip(): | |
| return "⚠️ Bitte geben Sie einen Test-Prompt ein." | |
| results = [] | |
| results.append(f"# 🚀 SAAP Multi-Agent Performance Benchmark") | |
| results.append(f"**Agent Role:** {agent_role}") | |
| results.append(f"**Test Prompt:** {prompt}") | |
| results.append(f"**Models:** {', '.join(selected_models)}") | |
| results.append(f"**Timestamp:** {datetime.now().strftime('%H:%M:%S')}") | |
| results.append("---") | |
| total_time = 0 | |
| for model in selected_models: | |
| if model in available_models: | |
| result = benchmark.test_agent_response(prompt, model, agent_role) | |
| results.append(f"## 🤖 {model.upper()} ({agent_role})") | |
| results.append(f"**Status:** {result.get('status', '❌ Error')}") | |
| results.append(f"**Response Time:** {result.get('time', 'N/A')}") | |
| results.append(f"**Tokens Generated:** {result.get('tokens', 0)}") | |
| if 'response' in result and result['response']: | |
| preview = result['response'][:100].replace('\n', ' ') | |
| results.append(f"**Response Preview:** {preview}...") | |
| results.append("---") | |
| # Add to total time for averages | |
| try: | |
| time_val = float(result.get('time', '0').rstrip('s')) | |
| total_time += time_val | |
| except: | |
| pass | |
| # Performance Summary | |
| if selected_models: | |
| avg_time = total_time / len(selected_models) | |
| results.append(f"## 📊 Performance Summary") | |
| results.append(f"**Average Response Time:** {avg_time:.2f}s") | |
| results.append(f"**Total Models Tested:** {len(selected_models)}") | |
| # SAAP Performance Assessment | |
| if avg_time < 2.0: | |
| results.append(f"**SAAP Assessment:** ✅ Excellent for real-time multi-agent coordination") | |
| elif avg_time < 5.0: | |
| results.append(f"**SAAP Assessment:** ⚠️ Acceptable for batch processing") | |
| else: | |
| results.append(f"**SAAP Assessment:** ❌ Too slow for interactive agents") | |
| return "\n".join(results) | |
| # Gradio Interface | |
| with gr.Blocks(title="SAAP Performance Benchmark", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🚀 SAAP - satware AI Agent Platform Benchmark") | |
| gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Hochschule Worms** | **satware AG**") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt_input = gr.Textbox( | |
| label="SAAP Test Prompt", | |
| placeholder="Beispiel: Entwickle eine Systemarchitektur für Multi-Agent Koordination", | |
| lines=3, | |
| value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen." | |
| ) | |
| agent_role = gr.Dropdown( | |
| choices=["General", "Jane", "John", "Justus"], | |
| label="Agent Role Simulation", | |
| value="General" | |
| ) | |
| with gr.Column(scale=1): | |
| model_selection = gr.CheckboxGroup( | |
| choices=available_models, | |
| label="Models to Benchmark", | |
| value=available_models[:2] if len(available_models) >= 2 else available_models | |
| ) | |
| benchmark_btn = gr.Button("🚀 Run SAAP Benchmark", variant="primary", size="lg") | |
| # Results | |
| results_output = gr.Markdown(label="Benchmark Results") | |
| # Benchmark function | |
| benchmark_btn.click( | |
| run_saap_benchmark, | |
| inputs=[prompt_input, model_selection, agent_role], | |
| outputs=results_output | |
| ) | |
| # System Info | |
| with gr.Accordion("ℹ️ System Information", open=False): | |
| gr.Markdown(f""" | |
| ### 📋 SAAP Test Environment | |
| - **Available Models:** {len(available_models)} | |
| - **Models:** {', '.join(available_models)} | |
| - **Ollama Server:** {benchmark.base_url} | |
| ### 🎯 SAAP Performance Targets | |
| - **Real-time Coordination:** < 2s per response | |
| - **Batch Processing:** < 5s per response | |
| - **Multi-Agent Sync:** < 10s for complex workflows | |
| ### 🎓 Master Thesis Context | |
| **Projekt:** SAAP - satware AI Autonomous Agent Platform | |
| **Student:** Hanan Wandji Danga | |
| **Universität:** Hochschule Worms | |
| **Betreuung:** Michael Wegener | |
| **Ziel:** On-Premise Multi-Agent-Plattform mit lokalen LLMs | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |