Spaces:

satware
/

Ollama-Test

Sleeping

App Files Files Community

Hwandji commited on Sep 30, 2025

Commit

60986cb

1 Parent(s): e0e1626

🔧 Simplified HuggingFace Public API (no auth token required)

Browse files

Files changed (1) hide show

app.py +121 -74

app.py CHANGED Viewed

@@ -1,33 +1,46 @@
 import gradio as gr
 import requests
 import time
-import os
 from datetime import datetime
-from huggingface_hub import InferenceClient
-class HuggingFaceInferenceBenchmark:
     def __init__(self):
-        # HuggingFace Inference API Client
-        self.client = InferenceClient()
-        # Verfügbare Models über Inference API
         self.available_models = [
-            "microsoft/DialoGPT-small",      # 117M - Sehr schnell
-            "gpt2",                          # 124M - Standard GPT-2
-            "distilgpt2",                    # 82M - Optimiert
-            "microsoft/DialoGPT-medium",     # 345M - Mittlere Größe
-            "google/flan-t5-small",          # 80M - Instruction-tuned
         ]
     def test_agent_response(self, prompt, model_name, agent_role="General"):
-        """HuggingFace Inference API Test"""
         # SAAP-spezifische Prompts
         saap_prompts = {
-            "Jane": f"Als KI-Architektin für Multi-Agent-Systeme: {prompt}\n\nAntwort:",
-            "John": f"Als Softwareentwickler für AGI-Architekturen: {prompt}\n\nAntwort:",
-            "Justus": f"Als Rechtsexperte für DSGVO und KI-Compliance: {prompt}\n\nAntwort:",
-            "Lara": f"Als medizinische KI-Expertin: {prompt}\n\nAntwort:",
             "General": f"{prompt}\n\nAntwort:"
         }
@@ -35,43 +48,58 @@ class HuggingFaceInferenceBenchmark:
         start_time = time.time()
         try:
-            # HuggingFace Inference API Call
-            response = self.client.text_generation(
-                prompt=final_prompt,
-                model=model_name,
-                max_new_tokens=100,
-                temperature=0.7,
-                return_full_text=False,
-            )
             end_time = time.time()
             response_time = end_time - start_time
-            # Response bereinigen
-            response_text = response.strip() if isinstance(response, str) else str(response).strip()
             return {
-                "response": response_text,
-                "time": f"{response_time:.2f}s",
-                "model": model_name,
-                "tokens": len(response_text.split()),
-                "status": "✅ Success (HuggingFace Cloud)",
-                "environment": "☁️ HuggingFace Inference API"
             }
         except Exception as e:
-            end_time = time.time()
             return {
-                "status": f"❌ API Error: {str(e)[:50]}...",
-                "time": f"{end_time - start_time:.2f}s",
-                "environment": "☁️ HuggingFace Inference API"
             }
 # Global benchmark instance
-benchmark = HuggingFaceInferenceBenchmark()
 def run_cloud_benchmark(prompt, selected_models, agent_role):
-    """Cloud Performance Benchmark mit HuggingFace Inference API"""
     if not prompt.strip():
         return "⚠️ **Bitte Test-Prompt eingeben**"
@@ -80,7 +108,7 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
     results = []
     results.append("# ☁️ SAAP Cloud Performance Benchmark")
-    results.append("**Platform:** HuggingFace Inference API | **Environment:** Cloud GPU")
     results.append(f"**🤖 Agent Role:** {agent_role}")
     results.append(f"**📝 Test Prompt:** {prompt}")
     results.append(f"**🔧 Models:** {', '.join(selected_models)}")
@@ -97,14 +125,15 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
         results.append(f"**Status:** {result.get('status', '❌ Error')}")
         results.append(f"**Response Time:** {result.get('time', 'N/A')}")
         results.append(f"**Environment:** {result.get('environment', 'Unknown')}")
-        results.append(f"**Tokens Generated:** {result.get('tokens', 0)}")
         if 'response' in result and result['response']:
             preview = result['response'][:100].replace('\n', ' ')
-            results.append(f"**Response Preview:** {preview}...")
         results.append("---")
         if result.get('status', '').startswith('✅'):
             successful_tests += 1
             try:
@@ -120,26 +149,42 @@ def run_cloud_benchmark(prompt, selected_models, agent_role):
         results.append(f"**Average Response Time:** {avg_time:.2f}s")
         results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
-        # Vergleich mit deinen lokalen Daten
-        results.append(f"\n## 🆚 On-Premise vs. Cloud Vergleich")
-        results.append(f"**🏠 On-Premise (CachyOS):** 17-25s (deine Baseline)")
-        results.append(f"**☁️ Cloud (HuggingFace):** {avg_time:.2f}s")
-        performance_ratio = avg_time / 21.5  # Deine durchschnittliche lokale Zeit
-        if performance_ratio < 0.5:
-            results.append(f"**🎓 Thesis-Fazit:** ☁️ Cloud deutlich schneller ({1/performance_ratio:.1f}x)")
-        elif performance_ratio < 1.0:
-            results.append(f"**🎓 Thesis-Fazit:** ☁️ Cloud schneller, On-Premise konkurrenzfähig")
         else:
-            results.append(f"**🎓 Thesis-Fazit:** 🏠 On-Premise überlegen + Datenschutz-Vorteil")
     return "\n".join(results)
 # Gradio Interface
 with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
-    gr.Markdown("**HuggingFace Inference API** | **Cloud vs. On-Premise Vergleich**")
     with gr.Row():
         with gr.Column(scale=2):
@@ -150,22 +195,21 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
             )
             agent_role = gr.Dropdown(
-                choices=["General", "Jane", "John", "Justus", "Lara"],
-                label="Agent Role Simulation",
                 value="Jane"
             )
         with gr.Column(scale=1):
             model_selection = gr.CheckboxGroup(
                 choices=benchmark.available_models,
-                label="☁️ Cloud Models",
-                value=["distilgpt2", "gpt2"]
             )
             benchmark_btn = gr.Button("☁️ Run Cloud Benchmark", variant="primary", size="lg")
-    # Results
-    results_output = gr.Markdown(label="Cloud Benchmark Results")
     benchmark_btn.click(
         run_cloud_benchmark,
@@ -173,23 +217,26 @@ with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
         outputs=results_output
     )
-    with gr.Accordion("🎓 SAAP Thesis: Cloud vs. On-Premise", open=False):
         gr.Markdown("""
-        ### 📊 Benchmark-Strategie
-        **🏠 On-Premise Baseline:**
         - qwen2:1.5b: 25.94s | tinyllama: 17.96s
-        - Hardware: Intel i7-5600U, 16GB RAM
-        - Kosten: 0€ pro Request ✅
-        - DSGVO: 100% konform ✅
-        **☁️ Cloud Vergleich:**
-        - HuggingFace Inference API
-        - GPU-optimierte Cloud-Infrastruktur
-        - API-Kosten pro Request 💰
-        - Internetabhängig ❌
-        **Lokale App:** http://127.0.0.1:7860
         """)
 if __name__ == "__main__":

 import gradio as gr
 import requests
 import time
 from datetime import datetime
+class HuggingFacePublicAPI:
     def __init__(self):
+        self.api_url = "https://api-inference.huggingface.co/models/"
+        # Public Models die ohne Token funktionieren
         self.available_models = [
+            "gpt2",
+            "distilgpt2",
+            "microsoft/DialoGPT-small"
         ]
+    def query_model(self, model_name, prompt):
+        """Direct API call ohne HuggingFace Client"""
+        url = f"{self.api_url}{model_name}"
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": 100,
+                "temperature": 0.7,
+                "return_full_text": False
+            }
+        }
+        headers = {
+            "Content-Type": "application/json"
+        }
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+        return response
     def test_agent_response(self, prompt, model_name, agent_role="General"):
+        """Simplified HuggingFace API Test"""
         # SAAP-spezifische Prompts
         saap_prompts = {
+            "Jane": f"Als KI-Architektin für Multi-Agent-Systeme:\n{prompt}\n\nAntwort:",
+            "John": f"Als Softwareentwickler für AGI-Architekturen:\n{prompt}\n\nAntwort:",
+            "Justus": f"Als Rechtsexperte für DSGVO:\n{prompt}\n\nAntwort:",
             "General": f"{prompt}\n\nAntwort:"
         }
         start_time = time.time()
         try:
+            response = self.query_model(model_name, final_prompt)
             end_time = time.time()
             response_time = end_time - start_time
+            if response.status_code == 200:
+                result = response.json()
+                # Handle different response formats
+                if isinstance(result, list) and len(result) > 0:
+                    if isinstance(result[0], dict) and 'generated_text' in result[0]:
+                        response_text = result[0]['generated_text']
+                    else:
+                        response_text = str(result[0])
+                elif isinstance(result, dict) and 'generated_text' in result:
+                    response_text = result['generated_text']
+                else:
+                    response_text = str(result)
+                return {
+                    "response": response_text[:200],  # Limit length
+                    "time": f"{response_time:.2f}s",
+                    "model": model_name,
+                    "tokens": len(response_text.split()),
+                    "status": "✅ Success (HuggingFace Public API)",
+                    "environment": "☁️ HuggingFace Inference"
+                }
+            else:
+                error_msg = response.text if response.text else f"HTTP {response.status_code}"
+                return {
+                    "status": f"❌ API Error: {error_msg[:50]}",
+                    "time": f"{response_time:.2f}s",
+                    "environment": "☁️ HuggingFace Inference"
+                }
+        except requests.exceptions.Timeout:
             return {
+                "status": "❌ Timeout - Model loading too slow",
+                "time": f"{time.time() - start_time:.2f}s",
+                "environment": "☁️ HuggingFace Inference"
             }
         except Exception as e:
             return {
+                "status": f"❌ Error: {str(e)[:50]}",
+                "time": f"{time.time() - start_time:.2f}s",
+                "environment": "☁️ HuggingFace Inference"
             }
 # Global benchmark instance
+benchmark = HuggingFacePublicAPI()
 def run_cloud_benchmark(prompt, selected_models, agent_role):
+    """Simplified Cloud Benchmark"""
     if not prompt.strip():
         return "⚠️ **Bitte Test-Prompt eingeben**"
     results = []
     results.append("# ☁️ SAAP Cloud Performance Benchmark")
+    results.append("**Platform:** HuggingFace Public Inference API")
     results.append(f"**🤖 Agent Role:** {agent_role}")
     results.append(f"**📝 Test Prompt:** {prompt}")
     results.append(f"**🔧 Models:** {', '.join(selected_models)}")
         results.append(f"**Status:** {result.get('status', '❌ Error')}")
         results.append(f"**Response Time:** {result.get('time', 'N/A')}")
         results.append(f"**Environment:** {result.get('environment', 'Unknown')}")
+        results.append(f"**Tokens:** {result.get('tokens', 0)}")
         if 'response' in result and result['response']:
             preview = result['response'][:100].replace('\n', ' ')
+            results.append(f"**Preview:** {preview}...")
         results.append("---")
+        # Statistics
         if result.get('status', '').startswith('✅'):
             successful_tests += 1
             try:
         results.append(f"**Average Response Time:** {avg_time:.2f}s")
         results.append(f"**Successful Tests:** {successful_tests}/{len(selected_models)}")
+        # Direct comparison with your local data
+        results.append(f"\n## 🆚 **SAAP Thesis: Performance Comparison**")
+        results.append(f"### 🏠 **On-Premise (Your CachyOS Data):**")
+        results.append(f"- **qwen2:1.5b:** 25.94s")
+        results.append(f"- **tinyllama:** 17.96s")
+        results.append(f"- **Average:** ~22s")
+        results.append(f"- **Cost:** 0€ per request ✅")
+        results.append(f"- **Privacy:** 100% GDPR compliant ✅")
+        results.append(f"- **Offline:** Works without internet ✅")
+        results.append(f"### ☁️ **Cloud (HuggingFace API):**")
+        results.append(f"- **Average:** {avg_time:.2f}s")
+        results.append(f"- **Cost:** API fees per request 💰")
+        results.append(f"- **Privacy:** Data sent to cloud ⚠️")
+        results.append(f"- **Offline:** Internet required ❌")
+        # Dynamic conclusion
+        speedup = 22 / avg_time if avg_time > 0 else 0
+        if speedup > 2:
+            results.append(f"\n**🎓 Thesis Result:** ☁️ Cloud is {speedup:.1f}x faster, but On-Premise offers better privacy & cost control")
+        elif speedup > 1.2:
+            results.append(f"\n**🎓 Thesis Result:** ☁️ Cloud slightly faster ({speedup:.1f}x), On-Premise competitive with privacy advantages")
         else:
+            results.append(f"\n**🎓 Thesis Result:** 🏠 On-Premise performance competitive or better, plus privacy & cost benefits")
+    else:
+        results.append(f"## ❌ All API calls failed")
+        results.append(f"**Possible causes:** Rate limiting, model loading, network issues")
+        results.append(f"\n**🎓 Thesis Implication:** On-Premise provides more reliable availability")
     return "\n".join(results)
 # Gradio Interface
 with gr.Blocks(title="SAAP Cloud Benchmark", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ☁️ SAAP Cloud Performance Benchmark")
+    gr.Markdown("**Master Thesis:** Hanan Wandji Danga | **Cloud vs. On-Premise Comparison**")
     with gr.Row():
         with gr.Column(scale=2):
             )
             agent_role = gr.Dropdown(
+                choices=["General", "Jane", "John", "Justus"],
+                label="Agent Role Simulation",
                 value="Jane"
             )
         with gr.Column(scale=1):
             model_selection = gr.CheckboxGroup(
                 choices=benchmark.available_models,
+                label="☁️ Public Cloud Models",
+                value=["gpt2", "distilgpt2"]
             )
             benchmark_btn = gr.Button("☁️ Run Cloud Benchmark", variant="primary", size="lg")
+    results_output = gr.Markdown(label="Benchmark Results")
     benchmark_btn.click(
         run_cloud_benchmark,
         outputs=results_output
     )
+    with gr.Accordion("📊 SAAP Thesis Data", open=False):
         gr.Markdown("""
+        ### 🎯 Performance Comparison Strategy
+        **🏠 Your On-Premise Data (CachyOS):**
+        - Intel i7-5600U, 16GB RAM
         - qwen2:1.5b: 25.94s | tinyllama: 17.96s
+        - Average: ~22s for complex prompts
+        **☁️ Cloud Benchmark (This App):**
+        - HuggingFace Public Inference API
+        - GPU-optimized cloud infrastructure
+        - Direct performance comparison
+        **🎓 Expected Thesis Results:**
+        - Cloud: Potentially faster due to GPUs
+        - On-Premise: Better privacy, cost control
+        - Hybrid approach: Best of both worlds
+        **Local App:** http://127.0.0.1:7860
         """)
 if __name__ == "__main__":