Spaces:

LejobuildYT
/

AI_Chatbot

Running

App Files Files Community

LejobuildYT commited on 13 days ago

Commit

70b7b2b

verified ·

1 Parent(s): bdea4b9

Upload 18 files

Browse files

Files changed (7) hide show

Dockerfile +5 -32
Dockerfile2 +44 -0
app.py +15 -53
app_simple.py +187 -0
frontend.html +445 -0
index.html +1 -1
vite.config.js +5 -4

Dockerfile CHANGED Viewed

@@ -1,45 +1,18 @@
-# --- STAGE 1: Frontend Build ---
-FROM node:18-alpine AS frontend-builder
-WORKDIR /app
-# Copy package files (package-lock.json* = optional wenn nicht vorhanden)
-COPY package.json package-lock.json* ./
-# Use npm install (npm ci braucht package-lock.json und würde sonst fehlschlagen)
-RUN npm install
-# Copy nur notwendige Dateien für Frontend-Build
-COPY src ./src
-#COPY public ./public
-COPY vite.config.js index.html ./
-# Build React
-RUN npm run build
-# --- STAGE 2: Backend ---
 FROM python:3.10-slim
 WORKDIR /app
-# Minimal system deps - optional wenn torch/numpy C-Extensions braucht:
-# RUN apt-get update && apt-get install -y --no-install-recommends \
-#     build-essential \
-#     && rm -rf /var/lib/apt/lists/*
 # Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Backend & Plugins
-COPY app_fastapi.py ./app.py
-COPY serve_frontend.py .
 COPY plugins ./plugins/
-# Built Frontend von Stage 1
-COPY --from=frontend-builder /app/dist ./dist
 EXPOSE 7860
 # Start Backend
-CMD ["python", "app.py"]

+# --- STAGE: Backend ---
 FROM python:3.10-slim
 WORKDIR /app
 # Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Backend
+COPY app_simple.py ./app.py
+COPY frontend.html .
 COPY plugins ./plugins/
 EXPOSE 7860
 # Start Backend
+CMD ["python", "app.py"]

Dockerfile2 ADDED Viewed

	@@ -0,0 +1,44 @@

+# --- STAGE 1: Frontend Build ---
+FROM node:18-alpine AS frontend-builder
+WORKDIR /app
+# Copy package files (package-lock.json* = optional wenn nicht vorhanden)
+COPY package.json package-lock.json* ./
+# Use npm install (npm ci braucht package-lock.json und würde sonst fehlschlagen)
+RUN npm install
+# Copy nur notwendige Dateien für Frontend-Build
+COPY src ./src
+COPY vite.config.js index.html ./
+# Build React
+RUN npm run build
+# --- STAGE 2: Backend ---
+FROM python:3.10-slim
+WORKDIR /app
+# Minimal system deps - optional wenn torch/numpy C-Extensions braucht:
+# RUN apt-get update && apt-get install -y --no-install-recommends \
+#     build-essential \
+#     && rm -rf /var/lib/apt/lists/*
+# Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Backend & Plugins
+COPY app_fastapi.py ./app.py
+COPY serve_frontend.py .
+COPY plugins ./plugins/
+# Built Frontend von Stage 1
+COPY --from=frontend-builder /app/dist ./dist
+EXPOSE 7860
+# Start Backend
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-Hugging Face Spaces Backend - Zephyr-7B Inference Server
-Optimiert für Memory-Limited Environments mit Quantization
 """
 import os
@@ -13,7 +13,6 @@ import torch
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
-    BitsAndBytesConfig,
     pipeline
 )
 import time
@@ -24,30 +23,14 @@ logger = logging.getLogger(__name__)
 # Configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-USE_QUANTIZATION = True
 MAX_TOKENS = 512
 TEMPERATURE = 0.7
 TOP_P = 0.9
 # Auto-Select best model for available memory
 def select_model():
-    """Wählt bestes Modell für verfügbares Memory"""
-    try:
-        if torch.cuda.is_available():
-            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
-            logger.info(f"🔍 Detected GPU Memory: {gpu_memory:.1f}GB")
-            if gpu_memory >= 20:
-                return "HuggingFaceH4/zephyr-7b-beta"  # fp16
-            elif gpu_memory >= 10:
-                return "TheBloke/zephyr-7B-beta-AWQ"   # 4-bit AWQ
-            else:
-                return "TheBloke/zephyr-7B-beta-GGUF"  # 4-bit GGUF
-        else:
-            logger.info("💻 Using CPU - loading lighter model")
-            return "HuggingFaceH4/zephyr-7b-alpha"
-    except Exception as e:
-        logger.warning(f"⚠️ Memory detection failed: {e}, using AWQ")
         return "TheBloke/zephyr-7B-beta-AWQ"
 MODEL_NAME = os.getenv("MODEL_NAME", select_model())
@@ -86,37 +69,16 @@ def call_plugin_hook(hook_name, *args, **kwargs):
 logger.info(f"⏳ Loading model {MODEL_NAME} on {DEVICE}...")
 def load_model_optimized():
-    """Laden mit optimaler Quantization"""
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    # 8-bit Quantization für GPU (spart ~50% Memory!)
-    if USE_QUANTIZATION and DEVICE == "cuda":
-        try:
-            bnb_config = BitsAndBytesConfig(
-                load_in_8bit=True,
-                bnb_8bit_compute_dtype=torch.float16,
-                bnb_8bit_use_double_quant=True,
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                quantization_config=bnb_config,
-                device_map="auto",
-            )
-            logger.info("✅ Model loaded with 8-bit quantization")
-        except Exception as e:
-            logger.warning(f"⚠️ 8-bit failed: {e}, trying standard load")
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                device_map="auto" if DEVICE == "cuda" else None,
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-            )
-    else:
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            device_map="auto" if DEVICE == "cuda" else None,
-            torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-        )
     return tokenizer, model
 try:
@@ -156,11 +118,11 @@ def generate_response(prompt: str, system_prompt: str = None) -> dict:
     try:
         start_time = time.time()
-        # Format prompt if system prompt provided
         if system_prompt:
-            messages = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>\n"
-        else:
-            messages = f"<|user|>\n{prompt}\n<|assistant|>\n"
         # Generate
         outputs = pipe(

 #!/usr/bin/env python3
 """
+Hugging Face Spaces Backend - Qwen 1.5B Instruct
+Leicht, schnell und speichereffizient
 """
 import os
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     pipeline
 )
 import time
 # Configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_TOKENS = 512
 TEMPERATURE = 0.7
 TOP_P = 0.9
 # Auto-Select best model for available memory
 def select_model():
+    """Nutze Qwen 1.5B - klein und schnell!"""
+    return "Qwen/Qwen2.5-1.5B-Instruct"
         return "TheBloke/zephyr-7B-beta-AWQ"
 MODEL_NAME = os.getenv("MODEL_NAME", select_model())
 logger.info(f"⏳ Loading model {MODEL_NAME} on {DEVICE}...")
 def load_model_optimized():
+    """Qwen 1.5B - kein Quantization nötig, ist schon klein!"""
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="auto" if DEVICE == "cuda" else None,
+        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+    )
+    logger.info(f"✅ {MODEL_NAME} loaded successfully")
     return tokenizer, model
 try:
     try:
         start_time = time.time()
+        # Qwen message format
+        messages = []
         if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
         # Generate
         outputs = pipe(

app_simple.py ADDED Viewed

	@@ -0,0 +1,187 @@

+#!/usr/bin/env python3
+"""
+Zephyr-7B Backend für HF Spaces
+Frontend + Backend in EINEM Container (kein Vite-Drama!)
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    pipeline
+)
+import logging
+import time
+from pathlib import Path
+import os
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Zephyr-7B - HF Spaces")
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ========== MODEL LOADING ==========
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+USE_QUANTIZATION = True
+def select_model():
+    """Auto-select model based on available GPU memory"""
+    # Qwen 1.5B ist klein und schnell - nehmen wir immer das!
+    return "Qwen/Qwen2.5-1.5B-Instruct"
+MODEL_NAME = os.getenv("MODEL_NAME", select_model())
+logger.info(f"📌 Using model: {MODEL_NAME}")
+def load_model_optimized():
+    """Load with quantization"""
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    if USE_QUANTIZATION and DEVICE == "cuda":
+        try:
+            bnb_config = BitsAndBytesConfig(
+                load_in_8bit=True,
+                bnb_8bit_compute_dtype=torch.float16,
+                bnb_8bit_use_double_quant=True,
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                quantization_config=bnb_config,
+                device_map="auto",
+            )
+            logger.info("✅ Model loaded with 8-bit quantization")
+        except Exception as e:
+            logger.warning(f"⚠️ 8-bit failed: {e}, trying standard")
+            model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                device_map="auto" if DEVICE == "cuda" else None,
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+            )
+    else:
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            device_map="auto" if DEVICE == "cuda" else None,
+            torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+        )
+    return tokenizer, model
+try:
+    logger.info(f"⏳ Loading {MODEL_NAME}...")
+    tokenizer, model = load_model_optimized()
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=0 if DEVICE == "cuda" else -1,
+    )
+    logger.info("✅ Model ready!")
+except Exception as e:
+    logger.error(f"❌ Model loading failed: {e}")
+    raise
+# ========== API ENDPOINTS ==========
+class GenerateRequest(BaseModel):
+    prompt: str
+    system_prompt: str = None
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.9
+@app.post("/api/generate")
+async def generate(request: GenerateRequest):
+    """Generate text response"""
+    try:
+        start = time.time()
+        # Qwen prompt format: <|im_start|>role\ncontent\n<|im_end|>
+        messages = []
+        if request.system_prompt:
+            messages.append({"role": "system", "content": request.system_prompt})
+        messages.append({"role": "user", "content": request.prompt})
+        outputs = pipe(
+            messages,
+            max_new_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            do_sample=True,
+            return_full_text=False,
+        )
+        response_text = outputs[0]["generated_text"].strip()
+        elapsed = time.time() - start
+        return {
+            "response": response_text,
+            "tokens": len(tokenizer.encode(response_text)),
+            "time_seconds": round(elapsed, 2),
+            "model": MODEL_NAME,
+        }
+    except Exception as e:
+        logger.error(f"Generation error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/health")
+async def health():
+    """Health check"""
+    return {
+        "status": "ok",
+        "model": MODEL_NAME,
+        "device": DEVICE,
+    }
+@app.get("/api/info")
+async def info():
+    """Model info"""
+    gpu_memory = None
+    if torch.cuda.is_available():
+        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
+    return {
+        "model": MODEL_NAME,
+        "device": DEVICE,
+        "gpu_memory_gb": gpu_memory,
+        "quantization": USE_QUANTIZATION,
+    }
+# ========== STATIC FILES & FRONTEND ==========
+@app.get("/")
+async def serve_frontend():
+    """Serve main page"""
+    return FileResponse("frontend.html", media_type="text/html")
+@app.get("/{full_path:path}")
+async def fallback(full_path: str):
+    """Fallback for SPA routing"""
+    file_path = Path(full_path)
+    # Check if it's a static file
+    if file_path.exists():
+        return FileResponse(file_path)
+    # Otherwise serve frontend (SPA routing)
+    return FileResponse("frontend.html", media_type="text/html")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

frontend.html ADDED Viewed

	@@ -0,0 +1,445 @@

+<!DOCTYPE html>
+<html lang="de">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>🤖 Zephyr-7B Chatbot</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        :root {
+            --primary: #667eea;
+            --secondary: #764ba2;
+            --gray-50: #f9fafb;
+            --gray-100: #f3f4f6;
+            --gray-300: #d1d5db;
+            --gray-500: #6b7280;
+            --gray-600: #4b5563;
+            --gray-700: #374151;
+            --gray-800: #1f2937;
+            --gray-900: #111827;
+        }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
+            min-height: 100vh;
+            color: var(--gray-900);
+        }
+        .app-container {
+            display: flex;
+            flex-direction: column;
+            height: 100vh;
+            background: var(--gray-50);
+        }
+        .app-header {
+            background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
+            color: white;
+            padding: 20px;
+            text-align: center;
+            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        }
+        .app-header h1 {
+            font-size: 28px;
+            margin-bottom: 5px;
+        }
+        .app-content {
+            display: flex;
+            flex: 1;
+            gap: 20px;
+            padding: 20px;
+            max-width: 1200px;
+            margin: 0 auto;
+            width: 100%;
+        }
+        .sidebar {
+            width: 280px;
+            background: white;
+            border-radius: 8px;
+            padding: 20px;
+            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+            overflow-y: auto;
+        }
+        .sidebar h3 {
+            color: var(--gray-900);
+            font-size: 14px;
+            margin-bottom: 12px;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+        .sidebar label {
+            display: block;
+            font-size: 12px;
+            color: var(--gray-600);
+            font-weight: 600;
+            margin-top: 12px;
+            margin-bottom: 6px;
+        }
+        .sidebar textarea {
+            width: 100%;
+            min-height: 80px;
+            padding: 8px;
+            border: 1px solid var(--gray-300);
+            border-radius: 6px;
+            font-size: 12px;
+            resize: vertical;
+        }
+        .sidebar input[type="range"] {
+            width: 100%;
+            margin-top: 6px;
+        }
+        .chat-container {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+            overflow: hidden;
+        }
+        .messages {
+            flex: 1;
+            overflow-y: auto;
+            padding: 20px;
+            display: flex;
+            flex-direction: column;
+            gap: 12px;
+        }
+        .empty-state {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            gap: 20px;
+            text-align: center;
+            padding: 40px;
+            color: var(--gray-600);
+        }
+        .empty-icon {
+            font-size: 48px;
+            opacity: 0.6;
+        }
+        .message {
+            display: flex;
+            gap: 10px;
+            animation: slideIn 0.3s ease;
+        }
+        @keyframes slideIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        .message.user {
+            justify-content: flex-end;
+        }
+        .message-avatar {
+            font-size: 18px;
+            flex-shrink: 0;
+        }
+        .message-content {
+            display: flex;
+            flex-direction: column;
+            gap: 4px;
+            max-width: 70%;
+        }
+        .message.user .message-content {
+            align-items: flex-end;
+        }
+        .message-text {
+            padding: 10px 14px;
+            border-radius: 12px;
+            word-wrap: break-word;
+            font-size: 13px;
+            line-height: 1.5;
+        }
+        .message.user .message-text {
+            background: var(--primary);
+            color: white;
+        }
+        .message.assistant .message-text {
+            background: var(--gray-100);
+            color: var(--gray-900);
+        }
+        .message-stats {
+            font-size: 11px;
+            color: var(--gray-500);
+            padding: 0 14px;
+        }
+        .typing-indicator {
+            display: flex;
+            gap: 4px;
+            padding: 10px 14px;
+        }
+        .typing-indicator span {
+            width: 6px;
+            height: 6px;
+            border-radius: 50%;
+            background: var(--gray-400);
+            animation: typing 1.4s infinite;
+        }
+        .typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
+        .typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
+        @keyframes typing {
+            0%, 60%, 100% { opacity: 0.5; transform: translateY(0); }
+            30% { opacity: 1; transform: translateY(-8px); }
+        }
+        .input-area {
+            display: flex;
+            gap: 10px;
+            padding: 15px 20px;
+            border-top: 1px solid var(--gray-300);
+            background: var(--gray-50);
+        }
+        .input-area textarea {
+            flex: 1;
+            padding: 10px;
+            border: 1px solid var(--gray-300);
+            border-radius: 6px;
+            font-size: 13px;
+            resize: none;
+            max-height: 80px;
+            font-family: inherit;
+        }
+        .send-btn {
+            background: var(--primary);
+            color: white;
+            border: none;
+            padding: 10px 20px;
+            border-radius: 6px;
+            font-size: 12px;
+            font-weight: 600;
+            cursor: pointer;
+            align-self: flex-end;
+            white-space: nowrap;
+            transition: all 0.2s;
+        }
+        .send-btn:hover:not(:disabled) {
+            background: var(--secondary);
+            transform: translateY(-2px);
+            box-shadow: 0 10px 15px rgba(0, 0, 0, 0.1);
+        }
+        .send-btn:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+        @media (max-width: 768px) {
+            .app-content {
+                flex-direction: column;
+                gap: 10px;
+            }
+            .sidebar {
+                width: 100%;
+            }
+            .message-content {
+                max-width: 85%;
+            }
+            .input-area {
+                flex-direction: column;
+            }
+            .send-btn {
+                align-self: stretch;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="app-container">
+        <header class="app-header">
+            <h1>🤖 Zephyr-7B Chatbot</h1>
+            <p>Powered by Hugging Face Spaces</p>
+        </header>
+        <div class="app-content">
+            <aside class="sidebar">
+                <h3>⚙️ Settings</h3>
+                <label>System Prompt</label>
+                <textarea id="systemPrompt" placeholder="Define assistant role...">Du bist ein hilfsbereiter KI-Assistent.</textarea>
+                <label>Temperature: <span id="tempValue">0.70</span></label>
+                <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7">
+                <label>Top P: <span id="topPValue">0.90</span></label>
+                <input type="range" id="topP" min="0" max="1" step="0.05" value="0.9">
+                <div id="stats" style="margin-top: 20px; padding: 15px; background: #f0f9ff; border-radius: 6px; display: none;">
+                    <h4 style="font-size: 12px; margin-bottom: 8px;">📊 Last Response</h4>
+                    <div style="font-size: 11px; color: var(--gray-600);">
+                        <div>Tokens: <strong id="statsTokens">-</strong></div>
+                        <div>Time: <strong id="statsTime">-</strong>s</div>
+                    </div>
+                </div>
+            </aside>
+            <main class="chat-container">
+                <div class="messages" id="messages">
+                    <div class="empty-state">
+                        <div class="empty-icon">🤖</div>
+                        <h2 style="color: var(--gray-900);">Welcome!</h2>
+                        <p>Start a conversation with Zephyr-7B</p>
+                    </div>
+                </div>
+                <div class="input-area">
+                    <textarea id="messageInput" placeholder="Type your message... (Shift+Enter for new line)" rows="3"></textarea>
+                    <button class="send-btn" id="sendBtn" onclick="sendMessage()">➤ Send</button>
+                </div>
+            </main>
+        </div>
+    </div>
+    <script>
+        const messagesDiv = document.getElementById('messages');
+        const messageInput = document.getElementById('messageInput');
+        const sendBtn = document.getElementById('sendBtn');
+        const systemPromptInput = document.getElementById('systemPrompt');
+        const tempSlider = document.getElementById('temperature');
+        const topPSlider = document.getElementById('topP');
+        const statsDiv = document.getElementById('stats');
+        let isLoading = false;
+        let messages = [];
+        // Update display values
+        tempSlider.addEventListener('input', (e) => {
+            document.getElementById('tempValue').textContent = parseFloat(e.target.value).toFixed(2);
+        });
+        topPSlider.addEventListener('input', (e) => {
+            document.getElementById('topPValue').textContent = parseFloat(e.target.value).toFixed(2);
+        });
+        messageInput.addEventListener('keypress', (e) => {
+            if (e.key === 'Enter' && !e.shiftKey && !isLoading) {
+                e.preventDefault();
+                sendMessage();
+            }
+        });
+        async function sendMessage() {
+            const message = messageInput.value.trim();
+            if (!message || isLoading) return;
+            isLoading = true;
+            sendBtn.disabled = true;
+            messageInput.value = '';
+            // Clear empty state
+            if (messagesDiv.querySelector('.empty-state')) {
+                messagesDiv.innerHTML = '';
+            }
+            // Add user message
+            addMessage('user', message);
+            try {
+                const response = await fetch('/api/generate', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        prompt: message,
+                        system_prompt: systemPromptInput.value,
+                        temperature: parseFloat(tempSlider.value),
+                        top_p: parseFloat(topPSlider.value),
+                        max_tokens: 512
+                    })
+                });
+                if (!response.ok) {
+                    throw new Error(`HTTP error! status: ${response.status}`);
+                }
+                const data = await response.json();
+                addMessage('assistant', data.response, {
+                    tokens: data.tokens,
+                    time: data.time_seconds
+                });
+                // Show stats
+                document.getElementById('statsTokens').textContent = data.tokens;
+                document.getElementById('statsTime').textContent = data.time_seconds;
+                statsDiv.style.display = 'block';
+            } catch (error) {
+                addMessage('assistant', `❌ Error: ${error.message}`);
+            } finally {
+                isLoading = false;
+                sendBtn.disabled = false;
+                messageInput.focus();
+            }
+        }
+        function addMessage(role, content, stats = null) {
+            const messageEl = document.createElement('div');
+            messageEl.className = `message ${role}`;
+            const avatar = role === 'user' ? '👤' : '🤖';
+            let html = `
+                <div class="message-avatar">${avatar}</div>
+                <div class="message-content">
+                    <div class="message-text">${escapeHtml(content)}</div>
+            `;
+            if (stats) {
+                html += `<div class="message-stats">⏱️ ${stats.time}s • 📊 ${stats.tokens} tokens</div>`;
+            }
+            html += '</div>';
+            messageEl.innerHTML = html;
+            messagesDiv.appendChild(messageEl);
+            messagesDiv.scrollTop = messagesDiv.scrollHeight;
+        }
+        function escapeHtml(text) {
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+        // Initial focus
+        messageInput.focus();
+    </script>
+</body>
+</html>

index.html CHANGED Viewed

@@ -29,6 +29,6 @@
 </head>
 <body>
     <div id="root"></div>
-    <script type="module" src="../src/main.jsx"></script>
 </body>
 </html>

 </head>
 <body>
     <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
 </body>
 </html>

vite.config.js CHANGED Viewed

@@ -1,11 +1,9 @@
 import { defineConfig } from 'vite'
 import react from '@vitejs/plugin-react'
-import { resolve } from 'path'
 export default defineConfig({
   plugins: [react()],
-  // WICHTIG: Zeigt Vite, dass deine index.html im public-Ordner wohnt
-  root: '',
   server: {
     port: 5173,
     proxy: {
@@ -17,7 +15,10 @@ export default defineConfig({
     }
   },
   build: {
-    // Schiebt das fertige Build-Ergebnis wieder hoch ins Hauptverzeichnis nach /dist
     outDir: resolve(__dirname, 'dist'),
     emptyOutDir: true,
     sourcemap: true,

 import { defineConfig } from 'vite'
 import react from '@vitejs/plugin-react'
 export default defineConfig({
   plugins: [react()],
+  root: '.',  // Root ist das aktuelle Verzeichnis
   server: {
     port: 5173,
     proxy: {
     }
   },
   build: {
+    outDir: 'dist',
+    sourcemap: true,
+  }
+})
     outDir: resolve(__dirname, 'dist'),
     emptyOutDir: true,
     sourcemap: true,