Hwandji commited on
Commit
b967f68
·
1 Parent(s): ff6e651

Add SAAP Performance Benchmark Interface

Browse files
Files changed (3) hide show
  1. README.md +14 -6
  2. app.py +183 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,12 +1,20 @@
1
  ---
2
- title: Ollama Test
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.47.2
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SAAP Performance Benchmark
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # SAAP Ollama Performance Benchmark
13
+
14
+ Master Thesis: satware AI Autonomous Agent Platform
15
+
16
+ Test your local Ollama models with SAAP-specific scenarios:
17
+ - Multi-Agent Coordination
18
+ - On-Premise Performance Analysis
19
+ - Agent Role Simulations
20
+ - Real-time Response Benchmarking
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import time
5
+ from datetime import datetime
6
+
7
+ class OllamaSAAPBenchmark:
8
+ def __init__(self, base_url="http://localhost:11434"):
9
+ self.base_url = base_url
10
+
11
+ def test_agent_response(self, prompt, model, agent_role="General"):
12
+ """Test Agent-spezifische Responses für SAAP"""
13
+
14
+ # SAAP-spezifische Prompts je nach Agent-Rolle
15
+ saap_prompts = {
16
+ "Jane": f"Als KI-Architektin: {prompt}",
17
+ "John": f"Als Entwickler: {prompt}",
18
+ "Justus": f"Als Rechtsexperte: {prompt}",
19
+ "General": prompt
20
+ }
21
+
22
+ final_prompt = saap_prompts.get(agent_role, prompt)
23
+ start_time = time.time()
24
+
25
+ try:
26
+ response = requests.post(
27
+ f"{self.base_url}/api/generate",
28
+ json={
29
+ "model": model,
30
+ "prompt": final_prompt,
31
+ "stream": False,
32
+ "options": {"temperature": 0.7, "num_predict": 256}
33
+ },
34
+ timeout=60
35
+ )
36
+
37
+ end_time = time.time()
38
+
39
+ if response.status_code == 200:
40
+ result = response.json()
41
+ return {
42
+ "response": result.get("response", ""),
43
+ "time": f"{end_time - start_time:.2f}s",
44
+ "model": model,
45
+ "agent_role": agent_role,
46
+ "tokens": len(result.get("response", "").split()),
47
+ "status": "✅ Success"
48
+ }
49
+ else:
50
+ return {"status": f"❌ Error {response.status_code}", "time": f"{end_time - start_time:.2f}s"}
51
+
52
+ except Exception as e:
53
+ return {"status": f"❌ Connection Error: {str(e)[:50]}...", "time": f"{time.time() - start_time:.2f}s"}
54
+
55
+ def list_models(self):
56
+ try:
57
+ response = requests.get(f"{self.base_url}/api/tags")
58
+ if response.status_code == 200:
59
+ models = response.json().get("models", [])
60
+ return [model["name"] for model in models]
61
+ return ["Connection failed - check if Ollama is running"]
62
+ except:
63
+ return ["❌ Cannot connect to Ollama"]
64
+
65
+ # Initialize benchmark system
66
+ benchmark = OllamaSAAPBenchmark()
67
+ available_models = benchmark.list_models()
68
+
69
+ # SAAP Benchmark Interface
70
+ def run_saap_benchmark(prompt, selected_models, agent_role):
71
+ if not prompt.strip():
72
+ return "⚠️ Bitte geben Sie einen Test-Prompt ein."
73
+
74
+ results = []
75
+ results.append(f"# 🚀 SAAP Multi-Agent Performance Benchmark")
76
+ results.append(f"**Agent Role:** {agent_role}")
77
+ results.append(f"**Test Prompt:** {prompt}")
78
+ results.append(f"**Models:** {', '.join(selected_models)}")
79
+ results.append(f"**Timestamp:** {datetime.now().strftime('%H:%M:%S')}")
80
+ results.append("---")
81
+
82
+ total_time = 0
83
+ for model in selected_models:
84
+ if model in available_models:
85
+ result = benchmark.test_agent_response(prompt, model, agent_role)
86
+
87
+ results.append(f"## 🤖 {model.upper()} ({agent_role})")
88
+ results.append(f"**Status:** {result.get('status', '❌ Error')}")
89
+ results.append(f"**Response Time:** {result.get('time', 'N/A')}")
90
+ results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
91
+
92
+ if 'response' in result and result['response']:
93
+ preview = result['response'][:100].replace('\n', ' ')
94
+ results.append(f"**Response Preview:** {preview}...")
95
+
96
+ results.append("---")
97
+
98
+ # Add to total time for averages
99
+ try:
100
+ time_val = float(result.get('time', '0').rstrip('s'))
101
+ total_time += time_val
102
+ except:
103
+ pass
104
+
105
+ # Performance Summary
106
+ if selected_models:
107
+ avg_time = total_time / len(selected_models)
108
+ results.append(f"## 📊 Performance Summary")
109
+ results.append(f"**Average Response Time:** {avg_time:.2f}s")
110
+ results.append(f"**Total Models Tested:** {len(selected_models)}")
111
+
112
+ # SAAP Performance Assessment
113
+ if avg_time < 2.0:
114
+ results.append(f"**SAAP Assessment:** ✅ Excellent for real-time multi-agent coordination")
115
+ elif avg_time < 5.0:
116
+ results.append(f"**SAAP Assessment:** ⚠️ Acceptable for batch processing")
117
+ else:
118
+ results.append(f"**SAAP Assessment:** ❌ Too slow for interactive agents")
119
+
120
+ return "\n".join(results)
121
+
122
+ # Gradio Interface
123
+ with gr.Blocks(title="SAAP Performance Benchmark", theme=gr.themes.Soft()) as demo:
124
+ gr.Markdown("# 🚀 SAAP - satware AI Agent Platform Benchmark")
125
+ gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Hochschule Worms** | **satware AG**")
126
+
127
+ with gr.Row():
128
+ with gr.Column(scale=2):
129
+ prompt_input = gr.Textbox(
130
+ label="SAAP Test Prompt",
131
+ placeholder="Beispiel: Entwickle eine Systemarchitektur für Multi-Agent Koordination",
132
+ lines=3,
133
+ value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
134
+ )
135
+
136
+ agent_role = gr.Dropdown(
137
+ choices=["General", "Jane", "John", "Justus"],
138
+ label="Agent Role Simulation",
139
+ value="General"
140
+ )
141
+
142
+ with gr.Column(scale=1):
143
+ model_selection = gr.CheckboxGroup(
144
+ choices=available_models,
145
+ label="Models to Benchmark",
146
+ value=available_models[:2] if len(available_models) >= 2 else available_models
147
+ )
148
+
149
+ benchmark_btn = gr.Button("🚀 Run SAAP Benchmark", variant="primary", size="lg")
150
+
151
+ # Results
152
+ results_output = gr.Markdown(label="Benchmark Results")
153
+
154
+ # Benchmark function
155
+ benchmark_btn.click(
156
+ run_saap_benchmark,
157
+ inputs=[prompt_input, model_selection, agent_role],
158
+ outputs=results_output
159
+ )
160
+
161
+ # System Info
162
+ with gr.Accordion("ℹ️ System Information", open=False):
163
+ gr.Markdown(f"""
164
+ ### 📋 SAAP Test Environment
165
+ - **Available Models:** {len(available_models)}
166
+ - **Models:** {', '.join(available_models)}
167
+ - **Ollama Server:** {benchmark.base_url}
168
+
169
+ ### 🎯 SAAP Performance Targets
170
+ - **Real-time Coordination:** < 2s per response
171
+ - **Batch Processing:** < 5s per response
172
+ - **Multi-Agent Sync:** < 10s for complex workflows
173
+
174
+ ### 🎓 Master Thesis Context
175
+ **Projekt:** SAAP - satware AI Autonomous Agent Platform
176
+ **Student:** Hanan Wandji Danga
177
+ **Universität:** Hochschule Worms
178
+ **Betreuung:** Michael Wegener
179
+ **Ziel:** On-Premise Multi-Agent-Plattform mit lokalen LLMs
180
+ """)
181
+
182
+ if __name__ == "__main__":
183
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=4.0.0
2
+ requests>=2.31.0