Hwandji commited on
Commit
8186494
·
1 Parent(s): 343cc98

adaptation for huggingface.

Browse files
Files changed (1) hide show
  1. app.py +153 -132
app.py CHANGED
@@ -1,85 +1,67 @@
1
  import gradio as gr
 
2
  import time
 
3
  from datetime import datetime
4
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
- import torch
6
 
7
- class HuggingFaceCloudBenchmark:
8
  def __init__(self):
9
- self.models_cache = {}
 
 
 
10
  self.available_models = [
11
  "microsoft/DialoGPT-small", # 117M - Sehr schnell
12
- "gpt2", # 124M - Standard GPT-2
13
- "distilgpt2", # 82M - Optimiert & schnell
14
  "microsoft/DialoGPT-medium", # 345M - Mittlere Größe
 
 
15
  ]
16
 
17
- def load_model(self, model_name):
18
- """Lädt Model mit Caching für Performance"""
19
- if model_name not in self.models_cache:
20
- try:
21
- print(f"📥 Loading {model_name}...")
22
- # Optimiert für CPU-Performance
23
- self.models_cache[model_name] = pipeline(
24
- "text-generation",
25
- model=model_name,
26
- tokenizer=model_name,
27
- device=-1, # CPU statt GPU
28
- torch_dtype=torch.float32,
29
- max_length=512
30
- )
31
- print(f"✅ {model_name} loaded successfully")
32
- except Exception as e:
33
- print(f"❌ Failed to load {model_name}: {e}")
34
- return None
35
-
36
- return self.models_cache[model_name]
37
 
38
  def test_agent_response(self, prompt, model_name, agent_role="General"):
39
- """HuggingFace Cloud Inference Test"""
40
 
41
  # SAAP-spezifische Prompts
42
  saap_prompts = {
43
- "Jane": f"Als KI-Architektin für Multi-Agent-Systeme: {prompt}",
44
- "John": f"Als Softwareentwickler für AGI-Architekturen: {prompt}",
45
- "Justus": f"Als Rechtsexperte für DSGVO und KI-Compliance: {prompt}",
46
- "Lara": f"Als medizinische KI-Expertin: {prompt}",
47
- "General": prompt
48
  }
49
 
50
  final_prompt = saap_prompts.get(agent_role, prompt)
51
-
52
- # Model laden
53
- generator = self.load_model(model_name)
54
- if not generator:
55
- return {
56
- "status": f"❌ Model {model_name} konnte nicht geladen werden",
57
- "time": "0.00s",
58
- "environment": "☁️ HuggingFace Transformers"
59
- }
60
-
61
  start_time = time.time()
62
 
63
  try:
64
- # Inference mit optimierten Parametern
65
- result = generator(
66
- final_prompt,
67
- max_new_tokens=128, # Begrenzt für Performance
 
68
  temperature=0.7,
69
- do_sample=True,
70
  top_p=0.9,
71
- pad_token_id=generator.tokenizer.eos_token_id,
72
- num_return_sequences=1,
73
- truncation=True
74
  )
75
 
76
  end_time = time.time()
77
  response_time = end_time - start_time
78
 
79
- # Response extrahieren
80
- generated_text = result[0]['generated_text']
81
- # Original Prompt entfernen
82
- response_text = generated_text.replace(final_prompt, "").strip()
83
 
84
  return {
85
  "response": response_text,
@@ -87,35 +69,28 @@ class HuggingFaceCloudBenchmark:
87
  "model": model_name,
88
  "agent_role": agent_role,
89
  "tokens": len(response_text.split()),
90
- "status": "✅ Success (HuggingFace Cloud)",
91
- "environment": "☁️ HuggingFace Transformers",
92
- "model_size": self.get_model_size(model_name)
93
  }
94
 
95
  except Exception as e:
96
  end_time = time.time()
 
 
97
  return {
98
- "status": f"❌ Inference Error: {str(e)[:50]}...",
99
- "time": f"{end_time - start_time:.2f}s",
100
- "environment": "☁️ HuggingFace Transformers"
 
101
  }
102
 
103
- def get_model_size(self, model_name):
104
- """Model-Größe für Vergleiche"""
105
- sizes = {
106
- "distilgpt2": "82M Parameter",
107
- "gpt2": "124M Parameter",
108
- "microsoft/DialoGPT-small": "117M Parameter",
109
- "microsoft/DialoGPT-medium": "345M Parameter"
110
- }
111
- return sizes.get(model_name, "Unknown Size")
112
-
113
  # Global benchmark instance
114
- print("☁️ Initializing HuggingFace Cloud Benchmark...")
115
- benchmark = HuggingFaceCloudBenchmark()
116
 
117
  def run_cloud_benchmark(prompt, selected_models, agent_role):
118
- """Cloud Performance Benchmark mit HuggingFace Models"""
119
  if not prompt.strip():
120
  return "⚠️ **Bitte Test-Prompt eingeben**"
121
 
@@ -124,7 +99,7 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
124
 
125
  results = []
126
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
127
- results.append("**Platform:** HuggingFace Transformers | **Environment:** Cloud GPU/CPU")
128
  results.append(f"**🤖 Agent Role:** {agent_role}")
129
  results.append(f"**📝 Test Prompt:** {prompt}")
130
  results.append(f"**🔧 Models:** {', '.join(selected_models)}")
@@ -137,16 +112,23 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
137
  for model_name in selected_models:
138
  result = benchmark.test_agent_response(prompt, model_name, agent_role)
139
 
 
 
140
  results.append(f"## ☁️ {model_name.upper()}")
141
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
142
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
143
- results.append(f"**Model Size:** {result.get('model_size', 'Unknown')}")
 
 
144
  results.append(f"**Environment:** {result.get('environment', '☁️ HuggingFace')}")
145
  results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
146
 
147
  if 'response' in result and result['response']:
148
  preview = result['response'][:120].replace('\n', ' ')
149
  results.append(f"**Response Preview:** {preview}...")
 
 
 
150
 
151
  results.append("---")
152
 
@@ -165,53 +147,81 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
165
  results.append(f"## 📊 Cloud Performance Summary")
166
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
167
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
168
- results.append(f"**Infrastructure:** ☁️ HuggingFace Spaces (Shared CPU/GPU)")
169
 
170
  # SAAP Cloud Assessment
171
- if avg_time < 3.0:
172
- results.append(f"**☁️ Cloud Rating:** 🚀 Exzellent für Cloud-basierte Multi-Agent Systeme")
173
- elif avg_time < 8.0:
174
- results.append(f"**☁️ Cloud Rating:** ⚡ Gut für interaktive Cloud-Anwendungen")
175
- elif avg_time < 15.0:
176
- results.append(f"**☁️ Cloud Rating:** ⚠️ Akzeptabel für Batch Cloud-Processing")
 
 
177
  else:
178
- results.append(f"**☁️ Cloud Rating:** 🐌 Optimierung erforderlich")
179
 
180
- # Thesis Integration - Vergleich mit lokalen Daten
181
- results.append(f"\n## 🆚 On-Premise vs. Cloud Comparison")
182
- results.append(f"**🏠 On-Premise (CachyOS + Ollama):**")
183
- results.append(f"- qwen2:1.5b: 25.94s")
184
- results.append(f"- tinyllama: 17.96s")
185
- results.append(f"- Hardware: Intel i7-5600U, 16GB RAM")
186
- results.append(f"- Kosten: 0€ pro Request ✅")
187
- results.append(f"- DSGVO: Vollständig konform ✅")
188
- results.append(f"- Offline: Funktioniert ohne Internet ✅")
189
 
190
- results.append(f"\n**☁️ Cloud (HuggingFace):**")
191
- results.append(f"- Average: {avg_time:.2f}s")
192
- results.append(f"- Hardware: Shared Cloud Infrastructure")
193
- results.append(f"- Kosten: API-Gebühren pro Request 💰")
194
- results.append(f"- DSGVO: Abhängig von Provider ⚠️")
195
- results.append(f"- Offline: Internetverbindung erforderlich ")
 
 
196
 
197
- # Fazit für Thesis
198
- if avg_time < 18:
199
- results.append(f"\n**🎓 Thesis-Fazit:** ☁️ Cloud hat Performance-Vorteil, aber On-Premise bietet Datenschutz und Kostenkontrolle")
200
- else:
201
- results.append(f"\n**🎓 Thesis-Fazit:** 🏠 On-Premise ist konkurrenzfähig und bietet zusätzlich Datenschutz-Compliance")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  return "\n".join(results)
204
 
205
  # Gradio Interface
206
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
207
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
208
- gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **HuggingFace Transformers** | **Cloud vs. On-Premise**")
209
 
210
  with gr.Row():
211
  with gr.Column(scale=2):
212
  prompt_input = gr.Textbox(
213
  label="SAAP Test Prompt",
214
- placeholder="Test-Prompt für Agent Performance...",
215
  lines=3,
216
  value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
217
  )
@@ -241,40 +251,51 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
241
  outputs=results_output
242
  )
243
 
244
- # System Info
245
- with gr.Accordion("ℹ️ Cloud vs. On-Premise Vergleich", open=False):
246
  gr.Markdown("""
247
- ### 🎓 SAAP Thesis Integration
 
 
 
 
 
 
 
 
 
 
248
 
249
- #### 🏠 On-Premise Vorteile (Ihre CachyOS Daten):
250
- - **Datenschutz:** ✅ 100% DSGVO-konform, keine Datenübertragung
251
- - **Kosten:** 0€ pro Request nach Initial-Setup
252
- - **Kontrolle:** Volle Kontrolle über Models und Daten
253
- - **Offline:** Funktioniert ohne Internetverbindung
254
- - **Sicherheit:** Keine Abhängigkeit von externen Services
255
 
256
- #### ☁️ Cloud Vorteile (Diese HuggingFace Daten):
257
- - **Performance:** ⚡ Möglicherweise schneller durch GPU-Cluster
258
- - **Skalierung:** 📈 Automatische Skalierung bei Last
259
- - **Wartung:** 🔧 Keine lokale Infrastruktur-Wartung
260
- - **Updates:** 🚀 Automatische Model-Updates verfügbar
261
 
262
- #### 🎯 Für SAAP Multi-Agent Platform:
263
- **On-Premise ist ideal für:**
264
- - Krankenhäuser, Behörden, Finanzsektor
265
- - Datenschutz-kritische Anwendungen
266
- - Kostenkontrolle bei hohem Durchsatz
267
 
268
- **Cloud ist geeignet für:**
269
- - Prototyping und Entwicklung
270
- - Variable Workloads
271
- - Schnelle Experimente
 
272
 
273
- ### 📊 Ihre Thesis-Daten:
274
- Sammeln Sie beide Datensätze für aussagekräftige Vergleiche!
 
 
275
 
276
- **Lokale App:** http://127.0.0.1:7860 (CachyOS)
277
- **Cloud App:** Diese HuggingFace Space
 
 
278
  """)
279
 
280
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import requests
3
  import time
4
+ import os
5
  from datetime import datetime
6
+ from huggingface_hub import InferenceClient
 
7
 
8
+ class HuggingFaceInferenceBenchmark:
9
  def __init__(self):
10
+ # HuggingFace Inference API Client
11
+ self.client = InferenceClient()
12
+
13
+ # Verfügbare Models über Inference API
14
  self.available_models = [
15
  "microsoft/DialoGPT-small", # 117M - Sehr schnell
16
+ "gpt2", # 124M - Standard GPT-2
17
+ "distilgpt2", # 82M - Optimiert
18
  "microsoft/DialoGPT-medium", # 345M - Mittlere Größe
19
+ "google/flan-t5-small", # 80M - Instruction-tuned
20
+ "google/flan-t5-base", # 250M - Bessere Performance
21
  ]
22
 
23
+ # Model-Informationen für bessere Vergleiche
24
+ self.model_info = {
25
+ "distilgpt2": {"size": "82M", "type": "GPT-2 optimiert", "speed": "Sehr schnell"},
26
+ "gpt2": {"size": "124M", "type": "GPT-2 Standard", "speed": "Schnell"},
27
+ "microsoft/DialoGPT-small": {"size": "117M", "type": "Dialog-optimiert", "speed": "Schnell"},
28
+ "microsoft/DialoGPT-medium": {"size": "345M", "type": "Dialog-optimiert", "speed": "Mittel"},
29
+ "google/flan-t5-small": {"size": "80M", "type": "Instruction-tuned", "speed": "Sehr schnell"},
30
+ "google/flan-t5-base": {"size": "250M", "type": "Instruction-tuned", "speed": "Mittel"},
31
+ }
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def test_agent_response(self, prompt, model_name, agent_role="General"):
34
+ """HuggingFace Inference API Test"""
35
 
36
  # SAAP-spezifische Prompts
37
  saap_prompts = {
38
+ "Jane": f"Als KI-Architektin für Multi-Agent-Systeme: {prompt}\n\nAntwort:",
39
+ "John": f"Als Softwareentwickler für AGI-Architekturen: {prompt}\n\nAntwort:",
40
+ "Justus": f"Als Rechtsexperte für DSGVO und KI-Compliance: {prompt}\n\nAntwort:",
41
+ "Lara": f"Als medizinische KI-Expertin: {prompt}\n\nAntwort:",
42
+ "General": f"{prompt}\n\nAntwort:"
43
  }
44
 
45
  final_prompt = saap_prompts.get(agent_role, prompt)
 
 
 
 
 
 
 
 
 
 
46
  start_time = time.time()
47
 
48
  try:
49
+ # HuggingFace Inference API Call
50
+ response = self.client.text_generation(
51
+ prompt=final_prompt,
52
+ model=model_name,
53
+ max_new_tokens=150, # Optimiert für Vergleichbarkeit
54
  temperature=0.7,
 
55
  top_p=0.9,
56
+ do_sample=True,
57
+ return_full_text=False, # Nur neue Tokens zurückgeben
 
58
  )
59
 
60
  end_time = time.time()
61
  response_time = end_time - start_time
62
 
63
+ # Response bereinigen
64
+ response_text = response.strip() if isinstance(response, str) else str(response).strip()
 
 
65
 
66
  return {
67
  "response": response_text,
 
69
  "model": model_name,
70
  "agent_role": agent_role,
71
  "tokens": len(response_text.split()),
72
+ "status": "✅ Success (HuggingFace Inference API)",
73
+ "environment": "☁️ HuggingFace Cloud Inference",
74
+ "model_info": self.model_info.get(model_name, {"size": "Unknown", "type": "Unknown", "speed": "Unknown"})
75
  }
76
 
77
  except Exception as e:
78
  end_time = time.time()
79
+ response_time = end_time - start_time
80
+
81
  return {
82
+ "status": f"❌ API Error: {str(e)[:60]}...",
83
+ "time": f"{response_time:.2f}s",
84
+ "environment": "☁️ HuggingFace Cloud Inference",
85
+ "error_details": str(e) if len(str(e)) < 200 else str(e)[:200] + "..."
86
  }
87
 
 
 
 
 
 
 
 
 
 
 
88
  # Global benchmark instance
89
+ print("☁️ Initializing HuggingFace Inference API Benchmark...")
90
+ benchmark = HuggingFaceInferenceBenchmark()
91
 
92
  def run_cloud_benchmark(prompt, selected_models, agent_role):
93
+ """Cloud Performance Benchmark mit HuggingFace Inference API"""
94
  if not prompt.strip():
95
  return "⚠️ **Bitte Test-Prompt eingeben**"
96
 
 
99
 
100
  results = []
101
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
102
+ results.append("**Platform:** HuggingFace Inference API | **Environment:** Cloud GPU Cluster")
103
  results.append(f"**🤖 Agent Role:** {agent_role}")
104
  results.append(f"**📝 Test Prompt:** {prompt}")
105
  results.append(f"**🔧 Models:** {', '.join(selected_models)}")
 
112
  for model_name in selected_models:
113
  result = benchmark.test_agent_response(prompt, model_name, agent_role)
114
 
115
+ model_info = result.get('model_info', {})
116
+
117
  results.append(f"## ☁️ {model_name.upper()}")
118
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
119
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
120
+ results.append(f"**Model Size:** {model_info.get('size', 'Unknown')}")
121
+ results.append(f"**Model Type:** {model_info.get('type', 'Unknown')}")
122
+ results.append(f"**Expected Speed:** {model_info.get('speed', 'Unknown')}")
123
  results.append(f"**Environment:** {result.get('environment', '☁️ HuggingFace')}")
124
  results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
125
 
126
  if 'response' in result and result['response']:
127
  preview = result['response'][:120].replace('\n', ' ')
128
  results.append(f"**Response Preview:** {preview}...")
129
+
130
+ if 'error_details' in result:
131
+ results.append(f"**Debug Info:** {result['error_details']}")
132
 
133
  results.append("---")
134
 
 
147
  results.append(f"## 📊 Cloud Performance Summary")
148
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
149
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
150
+ results.append(f"**Infrastructure:** ☁️ HuggingFace Inference API (Managed GPU Cluster)")
151
 
152
  # SAAP Cloud Assessment
153
+ if avg_time < 2.0:
154
+ results.append(f"**☁️ Cloud Rating:** 🚀 Exzellent - Übertrifft lokale Hardware deutlich")
155
+ elif avg_time < 5.0:
156
+ results.append(f"**☁️ Cloud Rating:** ⚡ Sehr gut - Konkurrenzfähig mit lokaler Hardware")
157
+ elif avg_time < 10.0:
158
+ results.append(f"**☁️ Cloud Rating:** Gut - Ähnlich wie lokale Performance")
159
+ elif avg_time < 20.0:
160
+ results.append(f"**☁️ Cloud Rating:** ⚠️ Akzeptabel - Lokale Hardware möglicherweise besser")
161
  else:
162
+ results.append(f"**☁️ Cloud Rating:** 🐌 Langsam - On-Premise deutlich überlegen")
163
 
164
+ # Thesis Integration - Direkter Vergleich mit deinen lokalen Daten
165
+ results.append(f"\n## 🆚 **SAAP Thesis: Cloud vs. On-Premise Benchmark**")
 
 
 
 
 
 
 
166
 
167
+ results.append(f"\n### 🏠 **On-Premise (Deine CachyOS Daten):**")
168
+ results.append(f"- **qwen2:1.5b:** 25.94s (1.5B Parameter)")
169
+ results.append(f"- **tinyllama:** 17.96s (1B Parameter)")
170
+ results.append(f"- **Hardware:** Intel i7-5600U, 16GB RAM, keine GPU")
171
+ results.append(f"- **Kosten:** 0€ pro Request ")
172
+ results.append(f"- **DSGVO:** 100% konform, keine Datenübertragung ✅")
173
+ results.append(f"- **Verfügbarkeit:** Offline-fähig ✅")
174
+ results.append(f"- **Kontrolle:** Vollständige Datenkontrolle ✅")
175
 
176
+ results.append(f"\n### ☁️ **Cloud (HuggingFace Inference API):**")
177
+ results.append(f"- **Average Response Time:** {avg_time:.2f}s")
178
+ results.append(f"- **Hardware:** GPU-Cluster, optimierte Infrastruktur")
179
+ results.append(f"- **Kosten:** API-Gebühren pro Request 💰")
180
+ results.append(f"- **DSGVO:** Abhängig von Anbieter, Datenübertragung ⚠️")
181
+ results.append(f"- **Verfügbarkeit:** Internetverbindung erforderlich ❌")
182
+ results.append(f"- **Kontrolle:** Limitierte Kontrolle über Verarbeitung ⚠️")
183
+
184
+ # Dynamisches Fazit basierend auf Performance
185
+ performance_ratio = avg_time / 21.95 # Average deiner lokalen Tests
186
+
187
+ results.append(f"\n### 🎓 **SAAP Thesis-Fazit:**")
188
+
189
+ if performance_ratio < 0.3: # Cloud >3x schneller
190
+ results.append(f"**Performance-Vorteil Cloud:** ☁️ {1/performance_ratio:.1f}x schneller als On-Premise")
191
+ results.append(f"**Empfehlung:** Cloud für Performance-kritische Anwendungen, On-Premise für Datenschutz")
192
+ elif performance_ratio < 0.7: # Cloud schneller
193
+ results.append(f"**Performance-Vorteil Cloud:** ☁️ {1/performance_ratio:.1f}x schneller als On-Premise")
194
+ results.append(f"**Empfehlung:** Balanced Approach - je nach Priorität Performance vs. Datenschutz")
195
+ elif performance_ratio < 1.3: # Ähnliche Performance
196
+ results.append(f"**Performance:** Ähnlich (Cloud {performance_ratio:.1f}x vs. On-Premise)")
197
+ results.append(f"**Empfehlung:** 🏠 On-Premise vorzuziehen - gleiche Performance + besserer Datenschutz + keine Kosten")
198
+ else: # On-Premise schneller
199
+ results.append(f"**Performance-Vorteil On-Premise:** 🏠 {performance_ratio:.1f}x schneller als Cloud")
200
+ results.append(f"**Empfehlung:** 🏠 On-Premise deutlich überlegen - bessere Performance + Datenschutz + Kosteneffizienz")
201
+
202
+ results.append(f"\n**🎯 SAAP Multi-Agent Platform Strategie:**")
203
+ results.append(f"- **Entwicklung/Prototyping:** ☁️ Cloud für Flexibilität")
204
+ results.append(f"- **Produktion (DSGVO-kritisch):** 🏠 On-Premise für Compliance")
205
+ results.append(f"- **Hybrid-Ansatz:** Kritische Agenten On-Premise, Skalierung Cloud")
206
+
207
+ else:
208
+ results.append(f"## ❌ Cloud Performance Issues")
209
+ results.append(f"**Problem:** Keine erfolgreichen Tests")
210
+ results.append(f"**Mögliche Ursachen:** API-Limits, Model-Verfügbarkeit, Netzwerk")
211
+ results.append(f"\n**🎓 Thesis-Implikation:** On-Premise bietet höhere Zuverlässigkeit")
212
 
213
  return "\n".join(results)
214
 
215
  # Gradio Interface
216
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
217
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
218
+ gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **HuggingFace Inference API** | **Cloud vs. On-Premise Vergleich**")
219
 
220
  with gr.Row():
221
  with gr.Column(scale=2):
222
  prompt_input = gr.Textbox(
223
  label="SAAP Test Prompt",
224
+ placeholder="Test-Prompt für Agent Performance-Vergleich...",
225
  lines=3,
226
  value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
227
  )
 
251
  outputs=results_output
252
  )
253
 
254
+ # Enhanced System Info
255
+ with gr.Accordion("🎓 SAAP Thesis: Cloud vs. On-Premise Analyse", open=False):
256
  gr.Markdown("""
257
+ ### 📊 Benchmark-Strategie für Master-Thesis
258
+
259
+ #### 🏠 On-Premise Baseline (Ihre CachyOS Daten):
260
+ - **qwen2:1.5b:** 25.94s | **tinyllama:** 17.96s
261
+ - **Hardware:** Intel i7-5600U, 16GB RAM, keine GPU
262
+ - **Durchschnitt:** ~22s für komplexe Multi-Agent Prompts
263
+
264
+ #### ☁️ Cloud Vergleich (Diese App):
265
+ - **Direkte HuggingFace Inference API Calls**
266
+ - **GPU-optimierte Inferenz auf professioneller Cloud-Infrastruktur**
267
+ - **Verschiedene Model-Größen für faire Vergleiche**
268
 
269
+ ### 🎯 Thesis-Relevante Metriken:
270
+ 1. **Performance-Ratio:** Cloud-Zeit vs. On-Premise-Zeit
271
+ 2. **Kosteneffizienz:** 0€ (On-Premise) vs. API-Kosten (Cloud)
272
+ 3. **DSGVO-Compliance:** 100% (On-Premise) vs. Abhängig (Cloud)
273
+ 4. **Verfügbarkeit:** Offline (On-Premise) vs. Online-abhängig (Cloud)
274
+ 5. **Kontrolle:** Vollständig (On-Premise) vs. Limitiert (Cloud)
275
 
276
+ ### 🚀 Für SAAP Multi-Agent Platform:
 
 
 
 
277
 
278
+ **On-Premise Ideal für:**
279
+ - 🏥 Krankenhäuser (Patientendaten)
280
+ - 🏛️ Behörden (Bürgerdaten)
281
+ - 🏦 Finanzsektor (Transaktionsdaten)
282
+ - 🏭 Industrie 4.0 (Betriebsgeheimnisse)
283
 
284
+ **Cloud Geeignet für:**
285
+ - 🔬 Prototyping und Entwicklung
286
+ - 📈 Variable Workloads
287
+ - 🌐 Globale Skalierung
288
+ - ⚡ Performance-kritische Anwendungen
289
 
290
+ ### 📈 Erwartete Thesis-Ergebnisse:
291
+ - **Cloud:** Möglicherweise 2-10x schneller (GPU-Vorteil)
292
+ - **On-Premise:** Kosteneffizienter bei hohem Durchsatz
293
+ - **Hybrid-Ansatz:** Optimale Lösung für SAAP-Platform
294
 
295
+ ---
296
+ **🔬 Dual-Benchmark Setup:**
297
+ - **Lokale App:** http://127.0.0.1:7860 (On-Premise Daten sammeln)
298
+ - **Cloud App:** Diese HuggingFace Space (Cloud-Performance testen)
299
  """)
300
 
301
  if __name__ == "__main__":