Hwandji commited on
Commit
60986cb
Β·
1 Parent(s): e0e1626

πŸ”§ Simplified HuggingFace Public API (no auth token required)

Browse files
Files changed (1) hide show
  1. app.py +121 -74
app.py CHANGED
@@ -1,33 +1,46 @@
1
  import gradio as gr
2
  import requests
3
  import time
4
- import os
5
  from datetime import datetime
6
- from huggingface_hub import InferenceClient
7
 
8
- class HuggingFaceInferenceBenchmark:
9
  def __init__(self):
10
- # HuggingFace Inference API Client
11
- self.client = InferenceClient()
12
-
13
- # VerfΓΌgbare Models ΓΌber Inference API
14
  self.available_models = [
15
- "microsoft/DialoGPT-small", # 117M - Sehr schnell
16
- "gpt2", # 124M - Standard GPT-2
17
- "distilgpt2", # 82M - Optimiert
18
- "microsoft/DialoGPT-medium", # 345M - Mittlere Grâße
19
- "google/flan-t5-small", # 80M - Instruction-tuned
20
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def test_agent_response(self, prompt, model_name, agent_role="General"):
23
- """HuggingFace Inference API Test"""
24
 
25
  # SAAP-spezifische Prompts
26
  saap_prompts = {
27
- "Jane": f"Als KI-Architektin fΓΌr Multi-Agent-Systeme: {prompt}\n\nAntwort:",
28
- "John": f"Als Softwareentwickler fΓΌr AGI-Architekturen: {prompt}\n\nAntwort:",
29
- "Justus": f"Als Rechtsexperte fΓΌr DSGVO und KI-Compliance: {prompt}\n\nAntwort:",
30
- "Lara": f"Als medizinische KI-Expertin: {prompt}\n\nAntwort:",
31
  "General": f"{prompt}\n\nAntwort:"
32
  }
33
 
@@ -35,43 +48,58 @@ class HuggingFaceInferenceBenchmark:
35
  start_time = time.time()
36
 
37
  try:
38
- # HuggingFace Inference API Call
39
- response = self.client.text_generation(
40
- prompt=final_prompt,
41
- model=model_name,
42
- max_new_tokens=100,
43
- temperature=0.7,
44
- return_full_text=False,
45
- )
46
-
47
  end_time = time.time()
48
  response_time = end_time - start_time
49
 
50
- # Response bereinigen
51
- response_text = response.strip() if isinstance(response, str) else str(response).strip()
52
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  return {
54
- "response": response_text,
55
- "time": f"{response_time:.2f}s",
56
- "model": model_name,
57
- "tokens": len(response_text.split()),
58
- "status": "βœ… Success (HuggingFace Cloud)",
59
- "environment": "☁️ HuggingFace Inference API"
60
  }
61
-
62
  except Exception as e:
63
- end_time = time.time()
64
  return {
65
- "status": f"❌ API Error: {str(e)[:50]}...",
66
- "time": f"{end_time - start_time:.2f}s",
67
- "environment": "☁️ HuggingFace Inference API"
68
  }
69
 
70
  # Global benchmark instance
71
- benchmark = HuggingFaceInferenceBenchmark()
72
 
73
  def run_cloud_benchmark(prompt, selected_models, agent_role):
74
- """Cloud Performance Benchmark mit HuggingFace Inference API"""
75
  if not prompt.strip():
76
  return "⚠️ **Bitte Test-Prompt eingeben**"
77
 
@@ -80,7 +108,7 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
80
 
81
  results = []
82
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
83
- results.append("**Platform:** HuggingFace Inference API | **Environment:** Cloud GPU")
84
  results.append(f"**πŸ€– Agent Role:** {agent_role}")
85
  results.append(f"**πŸ“ Test Prompt:** {prompt}")
86
  results.append(f"**πŸ”§ Models:** {', '.join(selected_models)}")
@@ -97,14 +125,15 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
97
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
98
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
99
  results.append(f"**Environment:** {result.get('environment', 'Unknown')}")
100
- results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
101
 
102
  if 'response' in result and result['response']:
103
  preview = result['response'][:100].replace('\n', ' ')
104
- results.append(f"**Response Preview:** {preview}...")
105
 
106
  results.append("---")
107
 
 
108
  if result.get('status', '').startswith('βœ…'):
109
  successful_tests += 1
110
  try:
@@ -120,26 +149,42 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
120
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
121
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
122
 
123
- # Vergleich mit deinen lokalen Daten
124
- results.append(f"\n## πŸ†š On-Premise vs. Cloud Vergleich")
125
- results.append(f"**🏠 On-Premise (CachyOS):** 17-25s (deine Baseline)")
126
- results.append(f"**☁️ Cloud (HuggingFace):** {avg_time:.2f}s")
 
 
 
 
 
127
 
128
- performance_ratio = avg_time / 21.5 # Deine durchschnittliche lokale Zeit
 
 
 
 
129
 
130
- if performance_ratio < 0.5:
131
- results.append(f"**πŸŽ“ Thesis-Fazit:** ☁️ Cloud deutlich schneller ({1/performance_ratio:.1f}x)")
132
- elif performance_ratio < 1.0:
133
- results.append(f"**πŸŽ“ Thesis-Fazit:** ☁️ Cloud schneller, On-Premise konkurrenzfΓ€hig")
 
 
134
  else:
135
- results.append(f"**πŸŽ“ Thesis-Fazit:** 🏠 On-Premise ΓΌberlegen + Datenschutz-Vorteil")
 
 
 
 
 
136
 
137
  return "\n".join(results)
138
 
139
  # Gradio Interface
140
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
141
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
142
- gr.Markdown("**HuggingFace Inference API** | **Cloud vs. On-Premise Vergleich**")
143
 
144
  with gr.Row():
145
  with gr.Column(scale=2):
@@ -150,22 +195,21 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
150
  )
151
 
152
  agent_role = gr.Dropdown(
153
- choices=["General", "Jane", "John", "Justus", "Lara"],
154
- label="Agent Role Simulation",
155
  value="Jane"
156
  )
157
 
158
  with gr.Column(scale=1):
159
  model_selection = gr.CheckboxGroup(
160
  choices=benchmark.available_models,
161
- label="☁️ Cloud Models",
162
- value=["distilgpt2", "gpt2"]
163
  )
164
 
165
  benchmark_btn = gr.Button("☁️ Run Cloud Benchmark", variant="primary", size="lg")
166
 
167
- # Results
168
- results_output = gr.Markdown(label="Cloud Benchmark Results")
169
 
170
  benchmark_btn.click(
171
  run_cloud_benchmark,
@@ -173,23 +217,26 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
173
  outputs=results_output
174
  )
175
 
176
- with gr.Accordion("πŸŽ“ SAAP Thesis: Cloud vs. On-Premise", open=False):
177
  gr.Markdown("""
178
- ### πŸ“Š Benchmark-Strategie
179
 
180
- **🏠 On-Premise Baseline:**
 
181
  - qwen2:1.5b: 25.94s | tinyllama: 17.96s
182
- - Hardware: Intel i7-5600U, 16GB RAM
183
- - Kosten: 0€ pro Request βœ…
184
- - DSGVO: 100% konform βœ…
 
 
 
185
 
186
- **☁️ Cloud Vergleich:**
187
- - HuggingFace Inference API
188
- - GPU-optimierte Cloud-Infrastruktur
189
- - API-Kosten pro Request πŸ’°
190
- - InternetabhÀngig ❌
191
 
192
- **Lokale App:** http://127.0.0.1:7860
193
  """)
194
 
195
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import requests
3
  import time
 
4
  from datetime import datetime
 
5
 
6
+ class HuggingFacePublicAPI:
7
  def __init__(self):
8
+ self.api_url = "https://api-inference.huggingface.co/models/"
9
+ # Public Models die ohne Token funktionieren
 
 
10
  self.available_models = [
11
+ "gpt2",
12
+ "distilgpt2",
13
+ "microsoft/DialoGPT-small"
 
 
14
  ]
15
+
16
+ def query_model(self, model_name, prompt):
17
+ """Direct API call ohne HuggingFace Client"""
18
+ url = f"{self.api_url}{model_name}"
19
+
20
+ payload = {
21
+ "inputs": prompt,
22
+ "parameters": {
23
+ "max_new_tokens": 100,
24
+ "temperature": 0.7,
25
+ "return_full_text": False
26
+ }
27
+ }
28
+
29
+ headers = {
30
+ "Content-Type": "application/json"
31
+ }
32
+
33
+ response = requests.post(url, headers=headers, json=payload, timeout=30)
34
+ return response
35
 
36
  def test_agent_response(self, prompt, model_name, agent_role="General"):
37
+ """Simplified HuggingFace API Test"""
38
 
39
  # SAAP-spezifische Prompts
40
  saap_prompts = {
41
+ "Jane": f"Als KI-Architektin fΓΌr Multi-Agent-Systeme:\n{prompt}\n\nAntwort:",
42
+ "John": f"Als Softwareentwickler fΓΌr AGI-Architekturen:\n{prompt}\n\nAntwort:",
43
+ "Justus": f"Als Rechtsexperte fΓΌr DSGVO:\n{prompt}\n\nAntwort:",
 
44
  "General": f"{prompt}\n\nAntwort:"
45
  }
46
 
 
48
  start_time = time.time()
49
 
50
  try:
51
+ response = self.query_model(model_name, final_prompt)
 
 
 
 
 
 
 
 
52
  end_time = time.time()
53
  response_time = end_time - start_time
54
 
55
+ if response.status_code == 200:
56
+ result = response.json()
57
+
58
+ # Handle different response formats
59
+ if isinstance(result, list) and len(result) > 0:
60
+ if isinstance(result[0], dict) and 'generated_text' in result[0]:
61
+ response_text = result[0]['generated_text']
62
+ else:
63
+ response_text = str(result[0])
64
+ elif isinstance(result, dict) and 'generated_text' in result:
65
+ response_text = result['generated_text']
66
+ else:
67
+ response_text = str(result)
68
+
69
+ return {
70
+ "response": response_text[:200], # Limit length
71
+ "time": f"{response_time:.2f}s",
72
+ "model": model_name,
73
+ "tokens": len(response_text.split()),
74
+ "status": "βœ… Success (HuggingFace Public API)",
75
+ "environment": "☁️ HuggingFace Inference"
76
+ }
77
+ else:
78
+ error_msg = response.text if response.text else f"HTTP {response.status_code}"
79
+ return {
80
+ "status": f"❌ API Error: {error_msg[:50]}",
81
+ "time": f"{response_time:.2f}s",
82
+ "environment": "☁️ HuggingFace Inference"
83
+ }
84
+
85
+ except requests.exceptions.Timeout:
86
  return {
87
+ "status": "❌ Timeout - Model loading too slow",
88
+ "time": f"{time.time() - start_time:.2f}s",
89
+ "environment": "☁️ HuggingFace Inference"
 
 
 
90
  }
 
91
  except Exception as e:
 
92
  return {
93
+ "status": f"❌ Error: {str(e)[:50]}",
94
+ "time": f"{time.time() - start_time:.2f}s",
95
+ "environment": "☁️ HuggingFace Inference"
96
  }
97
 
98
  # Global benchmark instance
99
+ benchmark = HuggingFacePublicAPI()
100
 
101
  def run_cloud_benchmark(prompt, selected_models, agent_role):
102
+ """Simplified Cloud Benchmark"""
103
  if not prompt.strip():
104
  return "⚠️ **Bitte Test-Prompt eingeben**"
105
 
 
108
 
109
  results = []
110
  results.append("# ☁️ SAAP Cloud Performance Benchmark")
111
+ results.append("**Platform:** HuggingFace Public Inference API")
112
  results.append(f"**πŸ€– Agent Role:** {agent_role}")
113
  results.append(f"**πŸ“ Test Prompt:** {prompt}")
114
  results.append(f"**πŸ”§ Models:** {', '.join(selected_models)}")
 
125
  results.append(f"**Status:** {result.get('status', '❌ Error')}")
126
  results.append(f"**Response Time:** {result.get('time', 'N/A')}")
127
  results.append(f"**Environment:** {result.get('environment', 'Unknown')}")
128
+ results.append(f"**Tokens:** {result.get('tokens', 0)}")
129
 
130
  if 'response' in result and result['response']:
131
  preview = result['response'][:100].replace('\n', ' ')
132
+ results.append(f"**Preview:** {preview}...")
133
 
134
  results.append("---")
135
 
136
+ # Statistics
137
  if result.get('status', '').startswith('βœ…'):
138
  successful_tests += 1
139
  try:
 
149
  results.append(f"**Average Response Time:** {avg_time:.2f}s")
150
  results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
151
 
152
+ # Direct comparison with your local data
153
+ results.append(f"\n## πŸ†š **SAAP Thesis: Performance Comparison**")
154
+ results.append(f"### 🏠 **On-Premise (Your CachyOS Data):**")
155
+ results.append(f"- **qwen2:1.5b:** 25.94s")
156
+ results.append(f"- **tinyllama:** 17.96s")
157
+ results.append(f"- **Average:** ~22s")
158
+ results.append(f"- **Cost:** 0€ per request βœ…")
159
+ results.append(f"- **Privacy:** 100% GDPR compliant βœ…")
160
+ results.append(f"- **Offline:** Works without internet βœ…")
161
 
162
+ results.append(f"### ☁️ **Cloud (HuggingFace API):**")
163
+ results.append(f"- **Average:** {avg_time:.2f}s")
164
+ results.append(f"- **Cost:** API fees per request πŸ’°")
165
+ results.append(f"- **Privacy:** Data sent to cloud ⚠️")
166
+ results.append(f"- **Offline:** Internet required ❌")
167
 
168
+ # Dynamic conclusion
169
+ speedup = 22 / avg_time if avg_time > 0 else 0
170
+ if speedup > 2:
171
+ results.append(f"\n**πŸŽ“ Thesis Result:** ☁️ Cloud is {speedup:.1f}x faster, but On-Premise offers better privacy & cost control")
172
+ elif speedup > 1.2:
173
+ results.append(f"\n**πŸŽ“ Thesis Result:** ☁️ Cloud slightly faster ({speedup:.1f}x), On-Premise competitive with privacy advantages")
174
  else:
175
+ results.append(f"\n**πŸŽ“ Thesis Result:** 🏠 On-Premise performance competitive or better, plus privacy & cost benefits")
176
+
177
+ else:
178
+ results.append(f"## ❌ All API calls failed")
179
+ results.append(f"**Possible causes:** Rate limiting, model loading, network issues")
180
+ results.append(f"\n**πŸŽ“ Thesis Implication:** On-Premise provides more reliable availability")
181
 
182
  return "\n".join(results)
183
 
184
  # Gradio Interface
185
  with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
186
  gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
187
+ gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Cloud vs. On-Premise Comparison**")
188
 
189
  with gr.Row():
190
  with gr.Column(scale=2):
 
195
  )
196
 
197
  agent_role = gr.Dropdown(
198
+ choices=["General", "Jane", "John", "Justus"],
199
+ label="Agent Role Simulation",
200
  value="Jane"
201
  )
202
 
203
  with gr.Column(scale=1):
204
  model_selection = gr.CheckboxGroup(
205
  choices=benchmark.available_models,
206
+ label="☁️ Public Cloud Models",
207
+ value=["gpt2", "distilgpt2"]
208
  )
209
 
210
  benchmark_btn = gr.Button("☁️ Run Cloud Benchmark", variant="primary", size="lg")
211
 
212
+ results_output = gr.Markdown(label="Benchmark Results")
 
213
 
214
  benchmark_btn.click(
215
  run_cloud_benchmark,
 
217
  outputs=results_output
218
  )
219
 
220
+ with gr.Accordion("πŸ“Š SAAP Thesis Data", open=False):
221
  gr.Markdown("""
222
+ ### 🎯 Performance Comparison Strategy
223
 
224
+ **🏠 Your On-Premise Data (CachyOS):**
225
+ - Intel i7-5600U, 16GB RAM
226
  - qwen2:1.5b: 25.94s | tinyllama: 17.96s
227
+ - Average: ~22s for complex prompts
228
+
229
+ **☁️ Cloud Benchmark (This App):**
230
+ - HuggingFace Public Inference API
231
+ - GPU-optimized cloud infrastructure
232
+ - Direct performance comparison
233
 
234
+ **πŸŽ“ Expected Thesis Results:**
235
+ - Cloud: Potentially faster due to GPUs
236
+ - On-Premise: Better privacy, cost control
237
+ - Hybrid approach: Best of both worlds
 
238
 
239
+ **Local App:** http://127.0.0.1:7860
240
  """)
241
 
242
  if __name__ == "__main__":