Hwandji commited on
Commit
48305bd
·
1 Parent(s): b967f68

🎯 Add HuggingFace Cloud Benchmark for SAAP Thesis

Browse files
Files changed (2) hide show
  1. app_hf_cloud.py +281 -0
  2. requirements.txt +3 -0
app_hf_cloud.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ from datetime import datetime
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
+ import torch
6
+
7
+ class HuggingFaceCloudBenchmark:
8
+ def __init__(self):
9
+ self.models_cache = {}
10
+ self.available_models = [
11
+ "microsoft/DialoGPT-small", # 117M - Sehr schnell
12
+ "gpt2", # 124M - Standard GPT-2
13
+ "distilgpt2", # 82M - Optimiert & schnell
14
+ "microsoft/DialoGPT-medium", # 345M - Mittlere Größe
15
+ ]
16
+
17
+ def load_model(self, model_name):
18
+ """Lädt Model mit Caching für Performance"""
19
+ if model_name not in self.models_cache:
20
+ try:
21
+ print(f"📥 Loading {model_name}...")
22
+ # Optimiert für CPU-Performance
23
+ self.models_cache[model_name] = pipeline(
24
+ "text-generation",
25
+ model=model_name,
26
+ tokenizer=model_name,
27
+ device=-1, # CPU statt GPU
28
+ torch_dtype=torch.float32,
29
+ max_length=512
30
+ )
31
+ print(f"✅ {model_name} loaded successfully")
32
+ except Exception as e:
33
+ print(f"❌ Failed to load {model_name}: {e}")
34
+ return None
35
+
36
+ return self.models_cache[model_name]
37
+
38
+ def test_agent_response(self, prompt, model_name, agent_role="General"):
39
+ """HuggingFace Cloud Inference Test"""
40
+
41
+ # SAAP-spezifische Prompts
42
+ saap_prompts = {
43
+ "Jane": f"Als KI-Architektin für Multi-Agent-Systeme: {prompt}",
44
+ "John": f"Als Softwareentwickler für AGI-Architekturen: {prompt}",
45
+ "Justus": f"Als Rechtsexperte für DSGVO und KI-Compliance: {prompt}",
46
+ "Lara": f"Als medizinische KI-Expertin: {prompt}",
47
+ "General": prompt
48
+ }
49
+
50
+ final_prompt = saap_prompts.get(agent_role, prompt)
51
+
52
+ # Model laden
53
+ generator = self.load_model(model_name)
54
+ if not generator:
55
+ return {
56
+ "status": f"❌ Model {model_name} konnte nicht geladen werden",
57
+ "time": "0.00s",
58
+ "environment": "☁️ HuggingFace Transformers"
59
+ }
60
+
61
+ start_time = time.time()
62
+
63
+ try:
64
+ # Inference mit optimierten Parametern
65
+ result = generator(
66
+ final_prompt,
67
+ max_new_tokens=128, # Begrenzt für Performance
68
+ temperature=0.7,
69
+ do_sample=True,
70
+ top_p=0.9,
71
+ pad_token_id=generator.tokenizer.eos_token_id,
72
+ num_return_sequences=1,
73
+ truncation=True
74
+ )
75
+
76
+ end_time = time.time()
77
+ response_time = end_time - start_time
78
+
79
+ # Response extrahieren
80
+ generated_text = result[0]['generated_text']
81
+ # Original Prompt entfernen
82
+ response_text = generated_text.replace(final_prompt, "").strip()
83
+
84
+ return {
85
+ "response": response_text,
86
+ "time": f"{response_time:.2f}s",
87
+ "model": model_name,
88
+ "agent_role": agent_role,
89
+ "tokens": len(response_text.split()),
90
+ "status": "✅ Success (HuggingFace Cloud)",
91
+ "environment": "☁️ HuggingFace Transformers",
92
+ "model_size": self.get_model_size(model_name)
93
+ }
94
+
95
+ except Exception as e:
96
+ end_time = time.time()
97
+ return {
98
+ "status": f"❌ Inference Error: {str(e)[:50]}...",
99
+ "time": f"{end_time - start_time:.2f}s",
100
+ "environment": "☁️ HuggingFace Transformers"
101
+ }
102
+
103
+ def get_model_size(self, model_name):
104
+ """Model-Größe für Vergleiche"""
105
+ sizes = {
106
+ "distilgpt2": "82M Parameter",
107
+ "gpt2": "124M Parameter",
108
+ "microsoft/DialoGPT-small": "117M Parameter",
109
+ "microsoft/DialoGPT-medium": "345M Parameter"
110
+ }
111
+ return sizes.get(model_name, "Unknown Size")
112
+
113
+ # Global benchmark instance
114
+ print("☁️ Initializing HuggingFace Cloud Benchmark...")
115
+ benchmark = HuggingFaceCloudBenchmark()
116
+
117
+ def run_cloud_benchmark(prompt, selected_models, agent_role):
118
+ """Cloud Performance Benchmark mit HuggingFace Models"""
119
+ if not prompt.strip():
120
+ return "⚠️ **Bitte Test-Prompt eingeben**"
121
+
122
+ if not selected_models:
123
+ return "⚠️ **Bitte mindestens ein Model auswählen**"
124
+
125
+ results = []
126
+ results.append("# ☁️ SAAP Cloud Performance Benchmark")
127
+ results.append("**Platform:** HuggingFace Transformers | **Environment:** Cloud GPU/CPU")
128
+ results.append(f"**🤖 Agent Role:** {agent_role}")
129
+ results.append(f"**📝 Test Prompt:** {prompt}")
130
+ results.append(f"**🔧 Models:** {', '.join(selected_models)}")
131
+ results.append(f"**⏰ Timestamp:** {datetime.now().strftime('%H:%M:%S')}")
132
+ results.append("---")
133
+
134
+ total_time = 0
135
+ successful_tests = 0
136
+
137
+ for model_name in selected_models:
138
+ result = benchmark.test_agent_response(prompt, model_name, agent_role)
139
+
140
+ results.append(f"## ☁️ {model_name.upper()}")
141
+ results.append(f"**Status:** {result.get('status', '❌ Error')}")
142
+ results.append(f"**Response Time:** {result.get('time', 'N/A')}")
143
+ results.append(f"**Model Size:** {result.get('model_size', 'Unknown')}")
144
+ results.append(f"**Environment:** {result.get('environment', '☁️ HuggingFace')}")
145
+ results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
146
+
147
+ if 'response' in result and result['response']:
148
+ preview = result['response'][:120].replace('\n', ' ')
149
+ results.append(f"**Response Preview:** {preview}...")
150
+
151
+ results.append("---")
152
+
153
+ # Statistics
154
+ if result.get('status', '').startswith('✅'):
155
+ successful_tests += 1
156
+ try:
157
+ time_val = float(result.get('time', '0').rstrip('s'))
158
+ total_time += time_val
159
+ except:
160
+ pass
161
+
162
+ # Performance Summary
163
+ if successful_tests > 0:
164
+ avg_time = total_time / successful_tests
165
+ results.append(f"## 📊 Cloud Performance Summary")
166
+ results.append(f"**Average Response Time:** {avg_time:.2f}s")
167
+ results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
168
+ results.append(f"**Infrastructure:** ☁️ HuggingFace Spaces (Shared CPU/GPU)")
169
+
170
+ # SAAP Cloud Assessment
171
+ if avg_time < 3.0:
172
+ results.append(f"**☁️ Cloud Rating:** 🚀 Exzellent für Cloud-basierte Multi-Agent Systeme")
173
+ elif avg_time < 8.0:
174
+ results.append(f"**☁️ Cloud Rating:** ⚡ Gut für interaktive Cloud-Anwendungen")
175
+ elif avg_time < 15.0:
176
+ results.append(f"**☁️ Cloud Rating:** ⚠️ Akzeptabel für Batch Cloud-Processing")
177
+ else:
178
+ results.append(f"**☁️ Cloud Rating:** 🐌 Optimierung erforderlich")
179
+
180
+ # Thesis Integration - Vergleich mit lokalen Daten
181
+ results.append(f"\n## 🆚 On-Premise vs. Cloud Comparison")
182
+ results.append(f"**🏠 On-Premise (CachyOS + Ollama):**")
183
+ results.append(f"- qwen2:1.5b: 25.94s")
184
+ results.append(f"- tinyllama: 17.96s")
185
+ results.append(f"- Hardware: Intel i7-5600U, 16GB RAM")
186
+ results.append(f"- Kosten: 0€ pro Request ✅")
187
+ results.append(f"- DSGVO: Vollständig konform ✅")
188
+ results.append(f"- Offline: Funktioniert ohne Internet ✅")
189
+
190
+ results.append(f"\n**☁️ Cloud (HuggingFace):**")
191
+ results.append(f"- Average: {avg_time:.2f}s")
192
+ results.append(f"- Hardware: Shared Cloud Infrastructure")
193
+ results.append(f"- Kosten: API-Gebühren pro Request 💰")
194
+ results.append(f"- DSGVO: Abhängig von Provider ⚠️")
195
+ results.append(f"- Offline: Internetverbindung erforderlich ❌")
196
+
197
+ # Fazit für Thesis
198
+ if avg_time < 18:
199
+ results.append(f"\n**🎓 Thesis-Fazit:** ☁️ Cloud hat Performance-Vorteil, aber On-Premise bietet Datenschutz und Kostenkontrolle")
200
+ else:
201
+ results.append(f"\n**🎓 Thesis-Fazit:** 🏠 On-Premise ist konkurrenzfähig und bietet zusätzlich Datenschutz-Compliance")
202
+
203
+ return "\n".join(results)
204
+
205
+ # Gradio Interface
206
+ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
207
+ gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
208
+ gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **HuggingFace Transformers** | **Cloud vs. On-Premise**")
209
+
210
+ with gr.Row():
211
+ with gr.Column(scale=2):
212
+ prompt_input = gr.Textbox(
213
+ label="SAAP Test Prompt",
214
+ placeholder="Test-Prompt für Agent Performance...",
215
+ lines=3,
216
+ value="Erkläre die Vorteile einer On-Premise Multi-Agent-Plattform gegenüber Cloud-Lösungen."
217
+ )
218
+
219
+ agent_role = gr.Dropdown(
220
+ choices=["General", "Jane", "John", "Justus", "Lara"],
221
+ label="Agent Role Simulation",
222
+ value="Jane"
223
+ )
224
+
225
+ with gr.Column(scale=1):
226
+ model_selection = gr.CheckboxGroup(
227
+ choices=benchmark.available_models,
228
+ label="Cloud Models to Benchmark",
229
+ value=["distilgpt2", "gpt2"]
230
+ )
231
+
232
+ benchmark_btn = gr.Button("☁️ Run Cloud Benchmark", variant="primary", size="lg")
233
+
234
+ # Results
235
+ results_output = gr.Markdown(label="Cloud Benchmark Results")
236
+
237
+ # Event handler
238
+ benchmark_btn.click(
239
+ run_cloud_benchmark,
240
+ inputs=[prompt_input, model_selection, agent_role],
241
+ outputs=results_output
242
+ )
243
+
244
+ # System Info
245
+ with gr.Accordion("ℹ️ Cloud vs. On-Premise Vergleich", open=False):
246
+ gr.Markdown("""
247
+ ### 🎓 SAAP Thesis Integration
248
+
249
+ #### 🏠 On-Premise Vorteile (Ihre CachyOS Daten):
250
+ - **Datenschutz:** ✅ 100% DSGVO-konform, keine Datenübertragung
251
+ - **Kosten:** ✅ 0€ pro Request nach Initial-Setup
252
+ - **Kontrolle:** ✅ Volle Kontrolle über Models und Daten
253
+ - **Offline:** ✅ Funktioniert ohne Internetverbindung
254
+ - **Sicherheit:** ✅ Keine Abhängigkeit von externen Services
255
+
256
+ #### ☁️ Cloud Vorteile (Diese HuggingFace Daten):
257
+ - **Performance:** ⚡ Möglicherweise schneller durch GPU-Cluster
258
+ - **Skalierung:** 📈 Automatische Skalierung bei Last
259
+ - **Wartung:** 🔧 Keine lokale Infrastruktur-Wartung
260
+ - **Updates:** 🚀 Automatische Model-Updates verfügbar
261
+
262
+ #### 🎯 Für SAAP Multi-Agent Platform:
263
+ **On-Premise ist ideal für:**
264
+ - Krankenhäuser, Behörden, Finanzsektor
265
+ - Datenschutz-kritische Anwendungen
266
+ - Kostenkontrolle bei hohem Durchsatz
267
+
268
+ **Cloud ist geeignet für:**
269
+ - Prototyping und Entwicklung
270
+ - Variable Workloads
271
+ - Schnelle Experimente
272
+
273
+ ### 📊 Ihre Thesis-Daten:
274
+ Sammeln Sie beide Datensätze für aussagekräftige Vergleiche!
275
+
276
+ **Lokale App:** http://127.0.0.1:7860 (CachyOS)
277
+ **Cloud App:** Diese HuggingFace Space
278
+ """)
279
+
280
+ if __name__ == "__main__":
281
+ demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  gradio>=4.0.0
2
  requests>=2.31.0
 
 
 
 
1
  gradio>=4.0.0
2
  requests>=2.31.0
3
+ accelerate>=0.20.0
4
+ torch>=2.0.0
5
+ transformers>=4.30.0