Hwandji commited on
Commit
e0e1626
·
1 Parent(s): 8186494

adaptation for huggingface.

Browse files
Files changed (1) hide show
  1. app.py +38 -144
app.py CHANGED
@@ -17,18 +17,7 @@ class HuggingFaceInferenceBenchmark:
17
  "distilgpt2", # 82M - Optimiert
18
  "microsoft/DialoGPT-medium", # 345M - Mittlere Größe
19
  "google/flan-t5-small", # 80M - Instruction-tuned
20
- "google/flan-t5-base", # 250M - Bessere Performance
21
  ]
22
-
23
- # Model-Informationen für bessere Vergleiche
24
- self.model_info = {
25
- "distilgpt2": {"size": "82M", "type": "GPT-2 optimiert", "speed": "Sehr schnell"},
26
- "gpt2": {"size": "124M", "type": "GPT-2 Standard", "speed": "Schnell"},
27
- "microsoft/DialoGPT-small": {"size": "117M", "type": "Dialog-optimiert", "speed": "Schnell"},
28
- "microsoft/DialoGPT-medium": {"size": "345M", "type": "Dialog-optimiert", "speed": "Mittel"},
29
- "google/flan-t5-small": {"size": "80M", "type": "Instruction-tuned", "speed": "Sehr schnell"},
30
- "google/flan-t5-base": {"size": "250M", "type": "Instruction-tuned", "speed": "Mittel"},
31
- }
32
 
33
  def test_agent_response(self, prompt, model_name, agent_role="General"):
34
  """HuggingFace Inference API Test"""
@@ -50,11 +39,9 @@ class HuggingFaceInferenceBenchmark:
50
  response = self.client.text_generation(
51
  prompt=final_prompt,
52
  model=model_name,
53
- max_new_tokens=150, # Optimiert für Vergleichbarkeit
54
  temperature=0.7,
55
- top_p=0.9,
56
- do_sample=True,
57
- return_full_text=False, # Nur neue Tokens zurückgeben
58
  )
59
 
60
  end_time = time.time()
@@ -67,26 +54,20 @@ class HuggingFaceInferenceBenchmark:
67
  "response": response_text,
68
  "time": f"{response_time:.2f}s",
69
  "model": model_name,
70
- "agent_role": agent_role,
71
  "tokens": len(response_text.split()),
72
- "status": "✅ Success (HuggingFace Inference API)",
73
- "environment": "☁️ HuggingFace Cloud Inference",
74
- "model_info": self.model_info.get(model_name, {"size": "Unknown", "type": "Unknown", "speed": "Unknown"})
75
  }
76
 
77
  except Exception as e:
78
  end_time = time.time()
79
- response_time = end_time - start_time
80
-
81
  return {
82
- "status": f"❌ API Error: {str(e)[:60]}...",
83
- "time": f"{response_time:.2f}s",
84
- "environment": "☁️ HuggingFace Cloud Inference",
85
- "error_details": str(e) if len(str(e)) < 200 else str(e)[:200] + "..."
86
  }
87
 
88
  # Global benchmark instance
89
- print("☁️ Initializing HuggingFace Inference API Benchmark...")
90
  benchmark = HuggingFaceInferenceBenchmark()
91
 
92
  def run_cloud_benchmark(prompt, selected_models, agent_role):
@@ -99,7 +80,7 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
99
 
100
  results = []
101
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
102
- results.append("**Platform:** HuggingFace Inference API | **Environment:** Cloud GPU Cluster")
103
  results.append(f"**🤖 Agent Role:** {agent_role}")
104
  results.append(f"**📝 Test Prompt:** {prompt}")
105
  results.append(f"**🔧 Models:** {', '.join(selected_models)}")
@@ -112,27 +93,18 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
112
  for model_name in selected_models:
113
  result = benchmark.test_agent_response(prompt, model_name, agent_role)
114
 
115
- model_info = result.get('model_info', {})
116
-
117
- results.append(f"## ☁️ {model_name.upper()}")
118
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
119
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
120
- results.append(f"**Model Size:** {model_info.get('size', 'Unknown')}")
121
- results.append(f"**Model Type:** {model_info.get('type', 'Unknown')}")
122
- results.append(f"**Expected Speed:** {model_info.get('speed', 'Unknown')}")
123
- results.append(f"**Environment:** {result.get('environment', '☁️ HuggingFace')}")
124
  results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
125
 
126
  if 'response' in result and result['response']:
127
- preview = result['response'][:120].replace('\n', ' ')
128
  results.append(f"**Response Preview:** {preview}...")
129
-
130
- if 'error_details' in result:
131
- results.append(f"**Debug Info:** {result['error_details']}")
132
 
133
  results.append("---")
134
 
135
- # Statistics
136
  if result.get('status', '').startswith('✅'):
137
  successful_tests += 1
138
  try:
@@ -147,83 +119,34 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
147
  results.append(f"## 📊 Cloud Performance Summary")
148
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
149
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
150
- results.append(f"**Infrastructure:** ☁️ HuggingFace Inference API (Managed GPU Cluster)")
151
-
152
- # SAAP Cloud Assessment
153
- if avg_time < 2.0:
154
- results.append(f"**☁️ Cloud Rating:** 🚀 Exzellent - Übertrifft lokale Hardware deutlich")
155
- elif avg_time < 5.0:
156
- results.append(f"**☁️ Cloud Rating:** ⚡ Sehr gut - Konkurrenzfähig mit lokaler Hardware")
157
- elif avg_time < 10.0:
158
- results.append(f"**☁️ Cloud Rating:** ✅ Gut - Ähnlich wie lokale Performance")
159
- elif avg_time < 20.0:
160
- results.append(f"**☁️ Cloud Rating:** ⚠️ Akzeptabel - Lokale Hardware möglicherweise besser")
161
- else:
162
- results.append(f"**☁️ Cloud Rating:** 🐌 Langsam - On-Premise deutlich überlegen")
163
-
164
- # Thesis Integration - Direkter Vergleich mit deinen lokalen Daten
165
- results.append(f"\n## 🆚 **SAAP Thesis: Cloud vs. On-Premise Benchmark**")
166
 
167
- results.append(f"\n### 🏠 **On-Premise (Deine CachyOS Daten):**")
168
- results.append(f"- **qwen2:1.5b:** 25.94s (1.5B Parameter)")
169
- results.append(f"- **tinyllama:** 17.96s (1B Parameter)")
170
- results.append(f"- **Hardware:** Intel i7-5600U, 16GB RAM, keine GPU")
171
- results.append(f"- **Kosten:** 0€ pro Request ✅")
172
- results.append(f"- **DSGVO:** 100% konform, keine Datenübertragung ✅")
173
- results.append(f"- **Verfügbarkeit:** Offline-fähig ✅")
174
- results.append(f"- **Kontrolle:** Vollständige Datenkontrolle ✅")
175
 
176
- results.append(f"\n### ☁️ **Cloud (HuggingFace Inference API):**")
177
- results.append(f"- **Average Response Time:** {avg_time:.2f}s")
178
- results.append(f"- **Hardware:** GPU-Cluster, optimierte Infrastruktur")
179
- results.append(f"- **Kosten:** API-Gebühren pro Request 💰")
180
- results.append(f"- **DSGVO:** Abhängig von Anbieter, Datenübertragung ⚠️")
181
- results.append(f"- **Verfügbarkeit:** Internetverbindung erforderlich ❌")
182
- results.append(f"- **Kontrolle:** Limitierte Kontrolle über Verarbeitung ⚠️")
183
 
184
- # Dynamisches Fazit basierend auf Performance
185
- performance_ratio = avg_time / 21.95 # Average deiner lokalen Tests
186
-
187
- results.append(f"\n### 🎓 **SAAP Thesis-Fazit:**")
188
-
189
- if performance_ratio < 0.3: # Cloud >3x schneller
190
- results.append(f"**Performance-Vorteil Cloud:** ☁️ {1/performance_ratio:.1f}x schneller als On-Premise")
191
- results.append(f"**Empfehlung:** Cloud für Performance-kritische Anwendungen, On-Premise für Datenschutz")
192
- elif performance_ratio < 0.7: # Cloud schneller
193
- results.append(f"**Performance-Vorteil Cloud:** ☁️ {1/performance_ratio:.1f}x schneller als On-Premise")
194
- results.append(f"**Empfehlung:** Balanced Approach - je nach Priorität Performance vs. Datenschutz")
195
- elif performance_ratio < 1.3: # Ähnliche Performance
196
- results.append(f"**Performance:** Ähnlich (Cloud {performance_ratio:.1f}x vs. On-Premise)")
197
- results.append(f"**Empfehlung:** 🏠 On-Premise vorzuziehen - gleiche Performance + besserer Datenschutz + keine Kosten")
198
- else: # On-Premise schneller
199
- results.append(f"**Performance-Vorteil On-Premise:** 🏠 {performance_ratio:.1f}x schneller als Cloud")
200
- results.append(f"**Empfehlung:** 🏠 On-Premise deutlich überlegen - bessere Performance + Datenschutz + Kosteneffizienz")
201
-
202
- results.append(f"\n**🎯 SAAP Multi-Agent Platform Strategie:**")
203
- results.append(f"- **Entwicklung/Prototyping:** ☁️ Cloud für Flexibilität")
204
- results.append(f"- **Produktion (DSGVO-kritisch):** 🏠 On-Premise für Compliance")
205
- results.append(f"- **Hybrid-Ansatz:** Kritische Agenten On-Premise, Skalierung Cloud")
206
-
207
- else:
208
- results.append(f"## ❌ Cloud Performance Issues")
209
- results.append(f"**Problem:** Keine erfolgreichen Tests")
210
- results.append(f"**Mögliche Ursachen:** API-Limits, Model-Verfügbarkeit, Netzwerk")
211
- results.append(f"\n**🎓 Thesis-Implikation:** On-Premise bietet höhere Zuverlässigkeit")
212
 
213
  return "\n".join(results)
214
 
215
  # Gradio Interface
216
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
217
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
218
- gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **HuggingFace Inference API** | **Cloud vs. On-Premise Vergleich**")
219
 
220
  with gr.Row():
221
  with gr.Column(scale=2):
222
  prompt_input = gr.Textbox(
223
  label="SAAP Test Prompt",
224
- placeholder="Test-Prompt für Agent Performance-Vergleich...",
225
  lines=3,
226
- value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
227
  )
228
 
229
  agent_role = gr.Dropdown(
@@ -235,7 +158,7 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
235
  with gr.Column(scale=1):
236
  model_selection = gr.CheckboxGroup(
237
  choices=benchmark.available_models,
238
- label="Cloud Models to Benchmark",
239
  value=["distilgpt2", "gpt2"]
240
  )
241
 
@@ -244,58 +167,29 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
244
  # Results
245
  results_output = gr.Markdown(label="Cloud Benchmark Results")
246
 
247
- # Event handler
248
  benchmark_btn.click(
249
  run_cloud_benchmark,
250
  inputs=[prompt_input, model_selection, agent_role],
251
  outputs=results_output
252
  )
253
 
254
- # Enhanced System Info
255
- with gr.Accordion("🎓 SAAP Thesis: Cloud vs. On-Premise Analyse", open=False):
256
  gr.Markdown("""
257
- ### 📊 Benchmark-Strategie für Master-Thesis
258
-
259
- #### 🏠 On-Premise Baseline (Ihre CachyOS Daten):
260
- - **qwen2:1.5b:** 25.94s | **tinyllama:** 17.96s
261
- - **Hardware:** Intel i7-5600U, 16GB RAM, keine GPU
262
- - **Durchschnitt:** ~22s für komplexe Multi-Agent Prompts
263
-
264
- #### ☁️ Cloud Vergleich (Diese App):
265
- - **Direkte HuggingFace Inference API Calls**
266
- - **GPU-optimierte Inferenz auf professioneller Cloud-Infrastruktur**
267
- - **Verschiedene Model-Größen für faire Vergleiche**
268
-
269
- ### 🎯 Thesis-Relevante Metriken:
270
- 1. **Performance-Ratio:** Cloud-Zeit vs. On-Premise-Zeit
271
- 2. **Kosteneffizienz:** 0€ (On-Premise) vs. API-Kosten (Cloud)
272
- 3. **DSGVO-Compliance:** 100% (On-Premise) vs. Abhängig (Cloud)
273
- 4. **Verfügbarkeit:** Offline (On-Premise) vs. Online-abhängig (Cloud)
274
- 5. **Kontrolle:** Vollständig (On-Premise) vs. Limitiert (Cloud)
275
-
276
- ### 🚀 Für SAAP Multi-Agent Platform:
277
-
278
- **On-Premise Ideal für:**
279
- - 🏥 Krankenhäuser (Patientendaten)
280
- - 🏛️ Behörden (Bürgerdaten)
281
- - 🏦 Finanzsektor (Transaktionsdaten)
282
- - 🏭 Industrie 4.0 (Betriebsgeheimnisse)
283
 
284
- **Cloud Geeignet für:**
285
- - 🔬 Prototyping und Entwicklung
286
- - 📈 Variable Workloads
287
- - 🌐 Globale Skalierung
288
- - Performance-kritische Anwendungen
289
 
290
- ### 📈 Erwartete Thesis-Ergebnisse:
291
- - **Cloud:** Möglicherweise 2-10x schneller (GPU-Vorteil)
292
- - **On-Premise:** Kosteneffizienter bei hohem Durchsatz
293
- - **Hybrid-Ansatz:** Optimale Lösung für SAAP-Platform
 
294
 
295
- ---
296
- **🔬 Dual-Benchmark Setup:**
297
- - **Lokale App:** http://127.0.0.1:7860 (On-Premise Daten sammeln)
298
- - **Cloud App:** Diese HuggingFace Space (Cloud-Performance testen)
299
  """)
300
 
301
  if __name__ == "__main__":
 
17
  "distilgpt2", # 82M - Optimiert
18
  "microsoft/DialoGPT-medium", # 345M - Mittlere Größe
19
  "google/flan-t5-small", # 80M - Instruction-tuned
 
20
  ]
 
 
 
 
 
 
 
 
 
 
21
 
22
  def test_agent_response(self, prompt, model_name, agent_role="General"):
23
  """HuggingFace Inference API Test"""
 
39
  response = self.client.text_generation(
40
  prompt=final_prompt,
41
  model=model_name,
42
+ max_new_tokens=100,
43
  temperature=0.7,
44
+ return_full_text=False,
 
 
45
  )
46
 
47
  end_time = time.time()
 
54
  "response": response_text,
55
  "time": f"{response_time:.2f}s",
56
  "model": model_name,
 
57
  "tokens": len(response_text.split()),
58
+ "status": "✅ Success (HuggingFace Cloud)",
59
+ "environment": "☁️ HuggingFace Inference API"
 
60
  }
61
 
62
  except Exception as e:
63
  end_time = time.time()
 
 
64
  return {
65
+ "status": f"❌ API Error: {str(e)[:50]}...",
66
+ "time": f"{end_time - start_time:.2f}s",
67
+ "environment": "☁️ HuggingFace Inference API"
 
68
  }
69
 
70
  # Global benchmark instance
 
71
  benchmark = HuggingFaceInferenceBenchmark()
72
 
73
  def run_cloud_benchmark(prompt, selected_models, agent_role):
 
80
 
81
  results = []
82
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
83
+ results.append("**Platform:** HuggingFace Inference API | **Environment:** Cloud GPU")
84
  results.append(f"**🤖 Agent Role:** {agent_role}")
85
  results.append(f"**📝 Test Prompt:** {prompt}")
86
  results.append(f"**🔧 Models:** {', '.join(selected_models)}")
 
93
  for model_name in selected_models:
94
  result = benchmark.test_agent_response(prompt, model_name, agent_role)
95
 
96
+ results.append(f"## ☁️ {model_name}")
 
 
97
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
98
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
99
+ results.append(f"**Environment:** {result.get('environment', 'Unknown')}")
 
 
 
100
  results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
101
 
102
  if 'response' in result and result['response']:
103
+ preview = result['response'][:100].replace('\n', ' ')
104
  results.append(f"**Response Preview:** {preview}...")
 
 
 
105
 
106
  results.append("---")
107
 
 
108
  if result.get('status', '').startswith('✅'):
109
  successful_tests += 1
110
  try:
 
119
  results.append(f"## 📊 Cloud Performance Summary")
120
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
121
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ # Vergleich mit deinen lokalen Daten
124
+ results.append(f"\n## 🆚 On-Premise vs. Cloud Vergleich")
125
+ results.append(f"**🏠 On-Premise (CachyOS):** 17-25s (deine Baseline)")
126
+ results.append(f"**☁️ Cloud (HuggingFace):** {avg_time:.2f}s")
 
 
 
 
127
 
128
+ performance_ratio = avg_time / 21.5 # Deine durchschnittliche lokale Zeit
 
 
 
 
 
 
129
 
130
+ if performance_ratio < 0.5:
131
+ results.append(f"**🎓 Thesis-Fazit:** ☁️ Cloud deutlich schneller ({1/performance_ratio:.1f}x)")
132
+ elif performance_ratio < 1.0:
133
+ results.append(f"**🎓 Thesis-Fazit:** ☁️ Cloud schneller, On-Premise konkurrenzfähig")
134
+ else:
135
+ results.append(f"**🎓 Thesis-Fazit:** 🏠 On-Premise überlegen + Datenschutz-Vorteil")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  return "\n".join(results)
138
 
139
  # Gradio Interface
140
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
141
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
142
+ gr.Markdown("**HuggingFace Inference API** | **Cloud vs. On-Premise Vergleich**")
143
 
144
  with gr.Row():
145
  with gr.Column(scale=2):
146
  prompt_input = gr.Textbox(
147
  label="SAAP Test Prompt",
 
148
  lines=3,
149
+ value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform."
150
  )
151
 
152
  agent_role = gr.Dropdown(
 
158
  with gr.Column(scale=1):
159
  model_selection = gr.CheckboxGroup(
160
  choices=benchmark.available_models,
161
+ label="☁️ Cloud Models",
162
  value=["distilgpt2", "gpt2"]
163
  )
164
 
 
167
  # Results
168
  results_output = gr.Markdown(label="Cloud Benchmark Results")
169
 
 
170
  benchmark_btn.click(
171
  run_cloud_benchmark,
172
  inputs=[prompt_input, model_selection, agent_role],
173
  outputs=results_output
174
  )
175
 
176
+ with gr.Accordion("🎓 SAAP Thesis: Cloud vs. On-Premise", open=False):
 
177
  gr.Markdown("""
178
+ ### 📊 Benchmark-Strategie
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ **🏠 On-Premise Baseline:**
181
+ - qwen2:1.5b: 25.94s | tinyllama: 17.96s
182
+ - Hardware: Intel i7-5600U, 16GB RAM
183
+ - Kosten: 0€ pro Request ✅
184
+ - DSGVO: 100% konform ✅
185
 
186
+ **☁️ Cloud Vergleich:**
187
+ - HuggingFace Inference API
188
+ - GPU-optimierte Cloud-Infrastruktur
189
+ - API-Kosten pro Request 💰
190
+ - Internetabhängig ❌
191
 
192
+ **Lokale App:** http://127.0.0.1:7860
 
 
 
193
  """)
194
 
195
  if __name__ == "__main__":