Hwandji commited on
Commit
a9ed00a
·
1 Parent(s): 179fd93

🔒 Secure API token management via environment variables

Browse files
Files changed (1) hide show
  1. app.py +67 -87
app.py CHANGED
@@ -6,19 +6,25 @@ from datetime import datetime
6
 
7
  class HuggingFaceRealAPI:
8
  def __init__(self):
9
- # API-Token aus Environment oder direkt einsetzen
10
- self.api_token = os.getenv("HF_TOKEN", None) # ← Token hier einsetzen
11
  self.api_url = "https://api-inference.huggingface.co/models/"
12
 
13
- # Models die definitiv funktionieren
14
  self.available_models = [
15
  "gpt2",
16
  "distilgpt2",
17
  "microsoft/DialoGPT-small"
18
  ]
19
 
 
 
 
20
  def query_model(self, model_name, prompt):
21
  """Echter API Call mit Authentication"""
 
 
 
22
  url = f"{self.api_url}{model_name}"
23
 
24
  headers = {
@@ -35,7 +41,7 @@ class HuggingFaceRealAPI:
35
  "return_full_text": False
36
  },
37
  "options": {
38
- "wait_for_model": True # Wichtig: Warten bis Model geladen ist
39
  }
40
  }
41
 
@@ -45,6 +51,13 @@ class HuggingFaceRealAPI:
45
  def test_agent_response(self, prompt, model_name, agent_role="General"):
46
  """Echter HuggingFace API Test"""
47
 
 
 
 
 
 
 
 
48
  saap_prompts = {
49
  "Jane": f"Als KI-Architektin für Multi-Agent-Systeme:\nFrage: {prompt}\nAntwort:",
50
  "John": f"Als Softwareentwickler für AGI-Architekturen:\nFrage: {prompt}\nAntwort:",
@@ -63,7 +76,8 @@ class HuggingFaceRealAPI:
63
  if response.status_code == 200:
64
  result = response.json()
65
 
66
- # Handle verschiedene Response-Formate
 
67
  if isinstance(result, list) and len(result) > 0:
68
  if 'generated_text' in result[0]:
69
  response_text = result[0]['generated_text']
@@ -71,16 +85,9 @@ class HuggingFaceRealAPI:
71
  response_text = str(result[0])
72
  elif isinstance(result, dict):
73
  if 'generated_text' in result:
74
- response_text = result['generated_text']
75
- elif 'error' in result:
76
- return {
77
- "status": f"❌ API Error: {result['error']}",
78
- "time": f"{response_time:.2f}s"
79
- }
80
  else:
81
  response_text = str(result)
82
- else:
83
- response_text = str(result)
84
 
85
  return {
86
  "response": response_text,
@@ -103,11 +110,6 @@ class HuggingFaceRealAPI:
103
  "time": f"{response_time:.2f}s"
104
  }
105
 
106
- except requests.exceptions.Timeout:
107
- return {
108
- "status": "❌ Timeout - Model zu langsam",
109
- "time": f"{time.time() - start_time:.2f}s"
110
- }
111
  except Exception as e:
112
  return {
113
  "status": f"❌ Error: {str(e)[:50]}",
@@ -118,29 +120,32 @@ class HuggingFaceRealAPI:
118
  benchmark = HuggingFaceRealAPI()
119
 
120
  def run_cloud_benchmark(prompt, selected_models, agent_role):
121
- """Echter Cloud Benchmark mit HuggingFace API"""
122
  if not prompt.strip():
123
  return "⚠️ **Bitte Test-Prompt eingeben**"
124
 
125
  if not selected_models:
126
  return "⚠️ **Bitte mindestens ein Model auswählen**"
127
 
128
- # Token-Check
129
- if "YOUR_TOKEN_HERE" in benchmark.api_token:
130
  return """
131
- ## ❌ HuggingFace API Token benötigt
132
-
133
- **Für echte API-Calls:**
134
- 1. Gehe zu https://huggingface.co/settings/tokens
135
- 2. Erstelle neuen "Read" Token
136
- 3. Ersetze `hf_YOUR_TOKEN_HERE` in der app.py
137
- 4. Neu deployen
138
-
139
- **Ohne Token sind nur lokale Tests möglich.**
 
 
 
140
  """
141
 
142
  results = []
143
- results.append("# ☁️ SAAP Cloud Performance Benchmark (ECHT)")
144
  results.append("**Platform:** HuggingFace Inference API | **Echte GPU-Cluster**")
145
  results.append(f"**🤖 Agent Role:** {agent_role}")
146
  results.append(f"**📝 Test Prompt:** {prompt}")
@@ -157,8 +162,15 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
157
  results.append(f"## ☁️ {model_name}")
158
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
159
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
160
- results.append(f"**Environment:** {result.get('environment', '☁️ HuggingFace')}")
161
- results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
 
 
 
 
 
 
 
162
 
163
  if 'response' in result and result['response']:
164
  preview = result['response'][:120].replace('\n', ' ')
@@ -166,7 +178,7 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
166
 
167
  results.append("---")
168
 
169
- # Statistics nur bei Success
170
  if result.get('status', '').startswith('✅'):
171
  successful_tests += 1
172
  try:
@@ -175,47 +187,36 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
175
  except:
176
  pass
177
 
178
- # Performance Summary mit echten Daten
179
  if successful_tests > 0:
180
  avg_time = total_time / successful_tests
181
- results.append(f"## 📊 Echte Cloud Performance")
182
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
183
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
184
- results.append(f"**Authentisch:** ✅ Echte HuggingFace GPU-Inferenz")
185
-
186
- # Echter Vergleich mit deinen lokalen Daten
187
- results.append(f"\n## 🆚 **Authentischer Performance-Vergleich**")
188
 
189
- results.append(f"### 🏠 **On-Premise (Deine gemessenen Werte):**")
190
- results.append(f"- **qwen2:1.5b:** 25.94s")
191
- results.append(f"- **tinyllama:** 17.96s")
192
- results.append(f"- **Durchschnitt:** ~22s")
193
 
194
- results.append(f"### ☁️ **Cloud (Echte HuggingFace API):**")
195
- results.append(f"- **Durchschnitt:** {avg_time:.2f}s")
196
-
197
- # Echter Speedup-Vergleich
198
  speedup = 22 / avg_time if avg_time > 0 else 1
199
- results.append(f"\n**🎓 Authentische Thesis-Ergebnisse:**")
200
- results.append(f"**Performance-Faktor:** {speedup:.1f}x ({'Cloud schneller' if speedup > 1 else 'On-Premise schneller'})")
201
 
202
- if speedup > 5:
203
- results.append(f"**Fazit:** ☁️ Cloud deutlich überlegen ({speedup:.1f}x), aber Kosten/Datenschutz beachten")
204
- elif speedup > 2:
205
- results.append(f"**Fazit:** ☁️ Cloud schneller, On-Premise für Datenschutz/Kosten besser")
206
  else:
207
- results.append(f"**Fazit:** 🏠 On-Premise konkurrenzfähig + Datenschutz + Kostenvorteile")
208
-
209
- else:
210
- results.append("## ❌ Keine erfolgreichen API-Calls")
211
- results.append("**Mögliche Ursachen:** Token-Problem, Model-Loading, Rate-Limits")
212
 
213
  return "\n".join(results)
214
 
215
  # Gradio Interface
216
- with gr.Blocks(title="SAAP Real Cloud Benchmark", theme=gr.themes.Soft()) as demo:
217
- gr.Markdown("# ☁️ SAAP Echter Cloud Performance Benchmark")
218
- gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Echte HuggingFace API vs. On-Premise**")
 
 
 
 
219
 
220
  with gr.Row():
221
  with gr.Column(scale=2):
@@ -227,7 +228,7 @@ with gr.Blocks(title="SAAP Real Cloud Benchmark", theme=gr.themes.Soft()) as dem
227
 
228
  agent_role = gr.Dropdown(
229
  choices=["General", "Jane", "John", "Justus"],
230
- label="Agent Role Simulation",
231
  value="Jane"
232
  )
233
 
@@ -235,39 +236,18 @@ with gr.Blocks(title="SAAP Real Cloud Benchmark", theme=gr.themes.Soft()) as dem
235
  model_selection = gr.CheckboxGroup(
236
  choices=benchmark.available_models,
237
  label="☁️ Echte Cloud Models",
238
- value=["gpt2"] # Start mit einem Model
239
  )
240
 
241
- benchmark_btn = gr.Button("☁️ Run ECHTER Cloud Benchmark", variant="primary", size="lg")
242
 
243
- results_output = gr.Markdown(label="Echte Benchmark Results")
244
 
245
  benchmark_btn.click(
246
  run_cloud_benchmark,
247
  inputs=[prompt_input, model_selection, agent_role],
248
  outputs=results_output
249
  )
250
-
251
- with gr.Accordion("🎓 Authentische SAAP Thesis-Daten", open=False):
252
- gr.Markdown("""
253
- ### ⚡ Echter API vs. Simulation
254
-
255
- **Vorher:** Simulierte 1.5s (unrealistisch)
256
- **Jetzt:** Echte HuggingFace GPU-Cluster Performance
257
-
258
- ### 📊 Erwartete echte Ergebnisse:
259
- - **gpt2:** ~3-8s (abhängig von Server-Last)
260
- - **distilgpt2:** ~2-5s (kleineres Model)
261
- - **DialoGPT:** ~4-10s (Dialog-optimiert)
262
-
263
- ### 🎯 Authentische Thesis-Daten:
264
- - ✅ Echte Cloud-Performance-Messwerte
265
- - ✅ Vergleichbar mit deinen On-Premise Daten (17-26s)
266
- - ✅ Realistische Kostenabschätzung möglich
267
- - ✅ Echte API-Latenz und Zuverlässigkeit
268
-
269
- **Lokale App:** http://127.0.0.1:7860
270
- """)
271
 
272
  if __name__ == "__main__":
273
  demo.launch()
 
6
 
7
  class HuggingFaceRealAPI:
8
  def __init__(self):
9
+ # Token aus Environment Variable (sicher)
10
+ self.api_token = os.getenv("HF_TOKEN")
11
  self.api_url = "https://api-inference.huggingface.co/models/"
12
 
13
+ # Verfügbare Models
14
  self.available_models = [
15
  "gpt2",
16
  "distilgpt2",
17
  "microsoft/DialoGPT-small"
18
  ]
19
 
20
+ # Token-Status prüfen
21
+ self.token_available = self.api_token is not None
22
+
23
  def query_model(self, model_name, prompt):
24
  """Echter API Call mit Authentication"""
25
+ if not self.token_available:
26
+ raise Exception("HF_TOKEN nicht verfügbar - in Space Secrets konfigurieren")
27
+
28
  url = f"{self.api_url}{model_name}"
29
 
30
  headers = {
 
41
  "return_full_text": False
42
  },
43
  "options": {
44
+ "wait_for_model": True
45
  }
46
  }
47
 
 
51
  def test_agent_response(self, prompt, model_name, agent_role="General"):
52
  """Echter HuggingFace API Test"""
53
 
54
+ if not self.token_available:
55
+ return {
56
+ "status": "❌ HF_TOKEN nicht konfiguriert in Space Secrets",
57
+ "time": "0.00s",
58
+ "setup_instructions": "Gehe zu Settings → Repository secrets → Füge HF_TOKEN hinzu"
59
+ }
60
+
61
  saap_prompts = {
62
  "Jane": f"Als KI-Architektin für Multi-Agent-Systeme:\nFrage: {prompt}\nAntwort:",
63
  "John": f"Als Softwareentwickler für AGI-Architekturen:\nFrage: {prompt}\nAntwort:",
 
76
  if response.status_code == 200:
77
  result = response.json()
78
 
79
+ # Response-Format handling
80
+ response_text = ""
81
  if isinstance(result, list) and len(result) > 0:
82
  if 'generated_text' in result[0]:
83
  response_text = result[0]['generated_text']
 
85
  response_text = str(result[0])
86
  elif isinstance(result, dict):
87
  if 'generated_text' in result:
88
+ response_text = result['generated_text']
 
 
 
 
 
89
  else:
90
  response_text = str(result)
 
 
91
 
92
  return {
93
  "response": response_text,
 
110
  "time": f"{response_time:.2f}s"
111
  }
112
 
 
 
 
 
 
113
  except Exception as e:
114
  return {
115
  "status": f"❌ Error: {str(e)[:50]}",
 
120
  benchmark = HuggingFaceRealAPI()
121
 
122
  def run_cloud_benchmark(prompt, selected_models, agent_role):
123
+ """Echter Cloud Benchmark mit sicherer Token-Verwaltung"""
124
  if not prompt.strip():
125
  return "⚠️ **Bitte Test-Prompt eingeben**"
126
 
127
  if not selected_models:
128
  return "⚠️ **Bitte mindestens ein Model auswählen**"
129
 
130
+ # Token-Status prüfen
131
+ if not benchmark.token_available:
132
  return """
133
+ ## ❌ HuggingFace API Token Setup erforderlich
134
+
135
+ **Konfiguration in HuggingFace Space:**
136
+ 1. Gehe zu Space Settings ⚙️
137
+ 2. Scroll zu "Repository secrets"
138
+ 3. Füge Secret hinzu: Name: `HF_TOKEN`, Value: [dein Token]
139
+ 4. Space wird automatisch neu starten
140
+
141
+ **Token generieren:**
142
+ 1. https://huggingface.co/settings/tokens
143
+ 2. "New token" → "Read" permissions
144
+ 3. Token kopieren und in Space Secret einfügen
145
  """
146
 
147
  results = []
148
+ results.append("# ☁️ SAAP Authentischer Cloud Benchmark")
149
  results.append("**Platform:** HuggingFace Inference API | **Echte GPU-Cluster**")
150
  results.append(f"**🤖 Agent Role:** {agent_role}")
151
  results.append(f"**📝 Test Prompt:** {prompt}")
 
162
  results.append(f"## ☁️ {model_name}")
163
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
164
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
165
+
166
+ if 'setup_instructions' in result:
167
+ results.append(f"**Setup:** {result['setup_instructions']}")
168
+
169
+ if 'environment' in result:
170
+ results.append(f"**Environment:** {result['environment']}")
171
+
172
+ if 'tokens' in result:
173
+ results.append(f"**Tokens:** {result['tokens']}")
174
 
175
  if 'response' in result and result['response']:
176
  preview = result['response'][:120].replace('\n', ' ')
 
178
 
179
  results.append("---")
180
 
181
+ # Statistics
182
  if result.get('status', '').startswith('✅'):
183
  successful_tests += 1
184
  try:
 
187
  except:
188
  pass
189
 
190
+ # Performance Summary
191
  if successful_tests > 0:
192
  avg_time = total_time / successful_tests
193
+ results.append(f"## 📊 Authentische Cloud Performance")
194
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
195
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
 
 
 
 
196
 
197
+ # Echter Vergleich
198
+ results.append(f"\n## 🆚 **Echter Performance-Vergleich**")
199
+ results.append(f"**🏠 On-Premise:** ~22s (deine CachyOS Daten)")
200
+ results.append(f"**☁️ Cloud:** {avg_time:.2f}s (echte HuggingFace API)")
201
 
 
 
 
 
202
  speedup = 22 / avg_time if avg_time > 0 else 1
203
+ results.append(f"**Performance-Faktor:** {speedup:.1f}x")
 
204
 
205
+ if speedup > 3:
206
+ results.append(f"**🎓 Thesis-Fazit:** ☁️ Cloud deutlich schneller, aber On-Premise für Datenschutz/Kosten")
 
 
207
  else:
208
+ results.append(f"**🎓 Thesis-Fazit:** 🏠 On-Premise konkurrenzfähig mit Datenschutz-Vorteilen")
 
 
 
 
209
 
210
  return "\n".join(results)
211
 
212
  # Gradio Interface
213
+ with gr.Blocks(title="SAAP Authentischer Cloud Benchmark") as demo:
214
+ gr.Markdown("# ☁️ SAAP Authentischer Cloud Performance Benchmark")
215
+ gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Echte API vs. On-Premise**")
216
+
217
+ # Token Status anzeigen
218
+ token_status = "✅ HF_TOKEN konfiguriert" if benchmark.token_available else "❌ HF_TOKEN fehlt - Setup erforderlich"
219
+ gr.Markdown(f"**API Status:** {token_status}")
220
 
221
  with gr.Row():
222
  with gr.Column(scale=2):
 
228
 
229
  agent_role = gr.Dropdown(
230
  choices=["General", "Jane", "John", "Justus"],
231
+ label="Agent Role",
232
  value="Jane"
233
  )
234
 
 
236
  model_selection = gr.CheckboxGroup(
237
  choices=benchmark.available_models,
238
  label="☁️ Echte Cloud Models",
239
+ value=["gpt2"]
240
  )
241
 
242
+ benchmark_btn = gr.Button("☁️ Run Authentischen Benchmark", variant="primary")
243
 
244
+ results_output = gr.Markdown()
245
 
246
  benchmark_btn.click(
247
  run_cloud_benchmark,
248
  inputs=[prompt_input, model_selection, agent_role],
249
  outputs=results_output
250
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  if __name__ == "__main__":
253
  demo.launch()