LejobuildYT commited on
Commit
70b7b2b
·
verified ·
1 Parent(s): bdea4b9

Upload 18 files

Browse files
Files changed (7) hide show
  1. Dockerfile +5 -32
  2. Dockerfile2 +44 -0
  3. app.py +15 -53
  4. app_simple.py +187 -0
  5. frontend.html +445 -0
  6. index.html +1 -1
  7. vite.config.js +5 -4
Dockerfile CHANGED
@@ -1,45 +1,18 @@
1
- # --- STAGE 1: Frontend Build ---
2
- FROM node:18-alpine AS frontend-builder
3
-
4
- WORKDIR /app
5
-
6
- # Copy package files (package-lock.json* = optional wenn nicht vorhanden)
7
- COPY package.json package-lock.json* ./
8
-
9
- # Use npm install (npm ci braucht package-lock.json und würde sonst fehlschlagen)
10
- RUN npm install
11
-
12
- # Copy nur notwendige Dateien für Frontend-Build
13
- COPY src ./src
14
- #COPY public ./public
15
- COPY vite.config.js index.html ./
16
-
17
- # Build React
18
- RUN npm run build
19
-
20
- # --- STAGE 2: Backend ---
21
  FROM python:3.10-slim
22
 
23
  WORKDIR /app
24
 
25
- # Minimal system deps - optional wenn torch/numpy C-Extensions braucht:
26
- # RUN apt-get update && apt-get install -y --no-install-recommends \
27
- # build-essential \
28
- # && rm -rf /var/lib/apt/lists/*
29
-
30
  # Python dependencies
31
  COPY requirements.txt .
32
  RUN pip install --no-cache-dir -r requirements.txt
33
 
34
- # Backend & Plugins
35
- COPY app_fastapi.py ./app.py
36
- COPY serve_frontend.py .
37
  COPY plugins ./plugins/
38
 
39
- # Built Frontend von Stage 1
40
- COPY --from=frontend-builder /app/dist ./dist
41
-
42
  EXPOSE 7860
43
 
44
  # Start Backend
45
- CMD ["python", "app.py"]
 
1
+ # --- STAGE: Backend ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  FROM python:3.10-slim
3
 
4
  WORKDIR /app
5
 
 
 
 
 
 
6
  # Python dependencies
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
+ # Backend
11
+ COPY app_simple.py ./app.py
12
+ COPY frontend.html .
13
  COPY plugins ./plugins/
14
 
 
 
 
15
  EXPOSE 7860
16
 
17
  # Start Backend
18
+ CMD ["python", "app.py"]
Dockerfile2 ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- STAGE 1: Frontend Build ---
2
+ FROM node:18-alpine AS frontend-builder
3
+
4
+ WORKDIR /app
5
+
6
+ # Copy package files (package-lock.json* = optional wenn nicht vorhanden)
7
+ COPY package.json package-lock.json* ./
8
+
9
+ # Use npm install (npm ci braucht package-lock.json und würde sonst fehlschlagen)
10
+ RUN npm install
11
+
12
+ # Copy nur notwendige Dateien für Frontend-Build
13
+ COPY src ./src
14
+ COPY vite.config.js index.html ./
15
+
16
+ # Build React
17
+ RUN npm run build
18
+
19
+ # --- STAGE 2: Backend ---
20
+ FROM python:3.10-slim
21
+
22
+ WORKDIR /app
23
+
24
+ # Minimal system deps - optional wenn torch/numpy C-Extensions braucht:
25
+ # RUN apt-get update && apt-get install -y --no-install-recommends \
26
+ # build-essential \
27
+ # && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Python dependencies
30
+ COPY requirements.txt .
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # Backend & Plugins
34
+ COPY app_fastapi.py ./app.py
35
+ COPY serve_frontend.py .
36
+ COPY plugins ./plugins/
37
+
38
+ # Built Frontend von Stage 1
39
+ COPY --from=frontend-builder /app/dist ./dist
40
+
41
+ EXPOSE 7860
42
+
43
+ # Start Backend
44
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -1,7 +1,7 @@
1
  #!/usr/bin/env python3
2
  """
3
- Hugging Face Spaces Backend - Zephyr-7B Inference Server
4
- Optimiert für Memory-Limited Environments mit Quantization
5
  """
6
 
7
  import os
@@ -13,7 +13,6 @@ import torch
13
  from transformers import (
14
  AutoModelForCausalLM,
15
  AutoTokenizer,
16
- BitsAndBytesConfig,
17
  pipeline
18
  )
19
  import time
@@ -24,30 +23,14 @@ logger = logging.getLogger(__name__)
24
 
25
  # Configuration
26
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
- USE_QUANTIZATION = True
28
  MAX_TOKENS = 512
29
  TEMPERATURE = 0.7
30
  TOP_P = 0.9
31
 
32
  # Auto-Select best model for available memory
33
  def select_model():
34
- """Wählt bestes Modell für verfügbares Memory"""
35
- try:
36
- if torch.cuda.is_available():
37
- gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
38
- logger.info(f"🔍 Detected GPU Memory: {gpu_memory:.1f}GB")
39
-
40
- if gpu_memory >= 20:
41
- return "HuggingFaceH4/zephyr-7b-beta" # fp16
42
- elif gpu_memory >= 10:
43
- return "TheBloke/zephyr-7B-beta-AWQ" # 4-bit AWQ
44
- else:
45
- return "TheBloke/zephyr-7B-beta-GGUF" # 4-bit GGUF
46
- else:
47
- logger.info("💻 Using CPU - loading lighter model")
48
- return "HuggingFaceH4/zephyr-7b-alpha"
49
- except Exception as e:
50
- logger.warning(f"⚠️ Memory detection failed: {e}, using AWQ")
51
  return "TheBloke/zephyr-7B-beta-AWQ"
52
 
53
  MODEL_NAME = os.getenv("MODEL_NAME", select_model())
@@ -86,37 +69,16 @@ def call_plugin_hook(hook_name, *args, **kwargs):
86
  logger.info(f"⏳ Loading model {MODEL_NAME} on {DEVICE}...")
87
 
88
  def load_model_optimized():
89
- """Laden mit optimaler Quantization"""
90
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
91
 
92
- # 8-bit Quantization für GPU (spart ~50% Memory!)
93
- if USE_QUANTIZATION and DEVICE == "cuda":
94
- try:
95
- bnb_config = BitsAndBytesConfig(
96
- load_in_8bit=True,
97
- bnb_8bit_compute_dtype=torch.float16,
98
- bnb_8bit_use_double_quant=True,
99
- )
100
- model = AutoModelForCausalLM.from_pretrained(
101
- MODEL_NAME,
102
- quantization_config=bnb_config,
103
- device_map="auto",
104
- )
105
- logger.info("✅ Model loaded with 8-bit quantization")
106
- except Exception as e:
107
- logger.warning(f"⚠️ 8-bit failed: {e}, trying standard load")
108
- model = AutoModelForCausalLM.from_pretrained(
109
- MODEL_NAME,
110
- device_map="auto" if DEVICE == "cuda" else None,
111
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
112
- )
113
- else:
114
- model = AutoModelForCausalLM.from_pretrained(
115
- MODEL_NAME,
116
- device_map="auto" if DEVICE == "cuda" else None,
117
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
118
- )
119
 
 
120
  return tokenizer, model
121
 
122
  try:
@@ -156,11 +118,11 @@ def generate_response(prompt: str, system_prompt: str = None) -> dict:
156
  try:
157
  start_time = time.time()
158
 
159
- # Format prompt if system prompt provided
 
160
  if system_prompt:
161
- messages = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>\n"
162
- else:
163
- messages = f"<|user|>\n{prompt}\n<|assistant|>\n"
164
 
165
  # Generate
166
  outputs = pipe(
 
1
  #!/usr/bin/env python3
2
  """
3
+ Hugging Face Spaces Backend - Qwen 1.5B Instruct
4
+ Leicht, schnell und speichereffizient
5
  """
6
 
7
  import os
 
13
  from transformers import (
14
  AutoModelForCausalLM,
15
  AutoTokenizer,
 
16
  pipeline
17
  )
18
  import time
 
23
 
24
  # Configuration
25
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
26
  MAX_TOKENS = 512
27
  TEMPERATURE = 0.7
28
  TOP_P = 0.9
29
 
30
  # Auto-Select best model for available memory
31
  def select_model():
32
+ """Nutze Qwen 1.5B - klein und schnell!"""
33
+ return "Qwen/Qwen2.5-1.5B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  return "TheBloke/zephyr-7B-beta-AWQ"
35
 
36
  MODEL_NAME = os.getenv("MODEL_NAME", select_model())
 
69
  logger.info(f"⏳ Loading model {MODEL_NAME} on {DEVICE}...")
70
 
71
  def load_model_optimized():
72
+ """Qwen 1.5B - kein Quantization nötig, ist schon klein!"""
73
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
74
 
75
+ model = AutoModelForCausalLM.from_pretrained(
76
+ MODEL_NAME,
77
+ device_map="auto" if DEVICE == "cuda" else None,
78
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
79
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ logger.info(f"✅ {MODEL_NAME} loaded successfully")
82
  return tokenizer, model
83
 
84
  try:
 
118
  try:
119
  start_time = time.time()
120
 
121
+ # Qwen message format
122
+ messages = []
123
  if system_prompt:
124
+ messages.append({"role": "system", "content": system_prompt})
125
+ messages.append({"role": "user", "content": prompt})
 
126
 
127
  # Generate
128
  outputs = pipe(
app_simple.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Zephyr-7B Backend für HF Spaces
4
+ Frontend + Backend in EINEM Container (kein Vite-Drama!)
5
+ """
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.responses import FileResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel
12
+ import torch
13
+ from transformers import (
14
+ AutoModelForCausalLM,
15
+ AutoTokenizer,
16
+ BitsAndBytesConfig,
17
+ pipeline
18
+ )
19
+ import logging
20
+ import time
21
+ from pathlib import Path
22
+ import os
23
+
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ app = FastAPI(title="Zephyr-7B - HF Spaces")
28
+
29
+ # CORS
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"],
33
+ allow_credentials=True,
34
+ allow_methods=["*"],
35
+ allow_headers=["*"],
36
+ )
37
+
38
+ # ========== MODEL LOADING ==========
39
+
40
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
41
+ USE_QUANTIZATION = True
42
+
43
+ def select_model():
44
+ """Auto-select model based on available GPU memory"""
45
+ # Qwen 1.5B ist klein und schnell - nehmen wir immer das!
46
+ return "Qwen/Qwen2.5-1.5B-Instruct"
47
+
48
+ MODEL_NAME = os.getenv("MODEL_NAME", select_model())
49
+ logger.info(f"📌 Using model: {MODEL_NAME}")
50
+
51
+ def load_model_optimized():
52
+ """Load with quantization"""
53
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
54
+
55
+ if USE_QUANTIZATION and DEVICE == "cuda":
56
+ try:
57
+ bnb_config = BitsAndBytesConfig(
58
+ load_in_8bit=True,
59
+ bnb_8bit_compute_dtype=torch.float16,
60
+ bnb_8bit_use_double_quant=True,
61
+ )
62
+ model = AutoModelForCausalLM.from_pretrained(
63
+ MODEL_NAME,
64
+ quantization_config=bnb_config,
65
+ device_map="auto",
66
+ )
67
+ logger.info("✅ Model loaded with 8-bit quantization")
68
+ except Exception as e:
69
+ logger.warning(f"⚠️ 8-bit failed: {e}, trying standard")
70
+ model = AutoModelForCausalLM.from_pretrained(
71
+ MODEL_NAME,
72
+ device_map="auto" if DEVICE == "cuda" else None,
73
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
74
+ )
75
+ else:
76
+ model = AutoModelForCausalLM.from_pretrained(
77
+ MODEL_NAME,
78
+ device_map="auto" if DEVICE == "cuda" else None,
79
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
80
+ )
81
+
82
+ return tokenizer, model
83
+
84
+ try:
85
+ logger.info(f"⏳ Loading {MODEL_NAME}...")
86
+ tokenizer, model = load_model_optimized()
87
+ pipe = pipeline(
88
+ "text-generation",
89
+ model=model,
90
+ tokenizer=tokenizer,
91
+ device=0 if DEVICE == "cuda" else -1,
92
+ )
93
+ logger.info("✅ Model ready!")
94
+ except Exception as e:
95
+ logger.error(f"❌ Model loading failed: {e}")
96
+ raise
97
+
98
+ # ========== API ENDPOINTS ==========
99
+
100
+ class GenerateRequest(BaseModel):
101
+ prompt: str
102
+ system_prompt: str = None
103
+ max_tokens: int = 512
104
+ temperature: float = 0.7
105
+ top_p: float = 0.9
106
+
107
+ @app.post("/api/generate")
108
+ async def generate(request: GenerateRequest):
109
+ """Generate text response"""
110
+ try:
111
+ start = time.time()
112
+
113
+ # Qwen prompt format: <|im_start|>role\ncontent\n<|im_end|>
114
+ messages = []
115
+
116
+ if request.system_prompt:
117
+ messages.append({"role": "system", "content": request.system_prompt})
118
+
119
+ messages.append({"role": "user", "content": request.prompt})
120
+
121
+ outputs = pipe(
122
+ messages,
123
+ max_new_tokens=request.max_tokens,
124
+ temperature=request.temperature,
125
+ top_p=request.top_p,
126
+ do_sample=True,
127
+ return_full_text=False,
128
+ )
129
+
130
+ response_text = outputs[0]["generated_text"].strip()
131
+ elapsed = time.time() - start
132
+
133
+ return {
134
+ "response": response_text,
135
+ "tokens": len(tokenizer.encode(response_text)),
136
+ "time_seconds": round(elapsed, 2),
137
+ "model": MODEL_NAME,
138
+ }
139
+ except Exception as e:
140
+ logger.error(f"Generation error: {e}")
141
+ raise HTTPException(status_code=500, detail=str(e))
142
+
143
+ @app.get("/api/health")
144
+ async def health():
145
+ """Health check"""
146
+ return {
147
+ "status": "ok",
148
+ "model": MODEL_NAME,
149
+ "device": DEVICE,
150
+ }
151
+
152
+ @app.get("/api/info")
153
+ async def info():
154
+ """Model info"""
155
+ gpu_memory = None
156
+ if torch.cuda.is_available():
157
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
158
+
159
+ return {
160
+ "model": MODEL_NAME,
161
+ "device": DEVICE,
162
+ "gpu_memory_gb": gpu_memory,
163
+ "quantization": USE_QUANTIZATION,
164
+ }
165
+
166
+ # ========== STATIC FILES & FRONTEND ==========
167
+
168
+ @app.get("/")
169
+ async def serve_frontend():
170
+ """Serve main page"""
171
+ return FileResponse("frontend.html", media_type="text/html")
172
+
173
+ @app.get("/{full_path:path}")
174
+ async def fallback(full_path: str):
175
+ """Fallback for SPA routing"""
176
+ file_path = Path(full_path)
177
+
178
+ # Check if it's a static file
179
+ if file_path.exists():
180
+ return FileResponse(file_path)
181
+
182
+ # Otherwise serve frontend (SPA routing)
183
+ return FileResponse("frontend.html", media_type="text/html")
184
+
185
+ if __name__ == "__main__":
186
+ import uvicorn
187
+ uvicorn.run(app, host="0.0.0.0", port=7860)
frontend.html ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="de">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>🤖 Zephyr-7B Chatbot</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ :root {
15
+ --primary: #667eea;
16
+ --secondary: #764ba2;
17
+ --gray-50: #f9fafb;
18
+ --gray-100: #f3f4f6;
19
+ --gray-300: #d1d5db;
20
+ --gray-500: #6b7280;
21
+ --gray-600: #4b5563;
22
+ --gray-700: #374151;
23
+ --gray-800: #1f2937;
24
+ --gray-900: #111827;
25
+ }
26
+
27
+ body {
28
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
29
+ background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
30
+ min-height: 100vh;
31
+ color: var(--gray-900);
32
+ }
33
+
34
+ .app-container {
35
+ display: flex;
36
+ flex-direction: column;
37
+ height: 100vh;
38
+ background: var(--gray-50);
39
+ }
40
+
41
+ .app-header {
42
+ background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
43
+ color: white;
44
+ padding: 20px;
45
+ text-align: center;
46
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
47
+ }
48
+
49
+ .app-header h1 {
50
+ font-size: 28px;
51
+ margin-bottom: 5px;
52
+ }
53
+
54
+ .app-content {
55
+ display: flex;
56
+ flex: 1;
57
+ gap: 20px;
58
+ padding: 20px;
59
+ max-width: 1200px;
60
+ margin: 0 auto;
61
+ width: 100%;
62
+ }
63
+
64
+ .sidebar {
65
+ width: 280px;
66
+ background: white;
67
+ border-radius: 8px;
68
+ padding: 20px;
69
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
70
+ overflow-y: auto;
71
+ }
72
+
73
+ .sidebar h3 {
74
+ color: var(--gray-900);
75
+ font-size: 14px;
76
+ margin-bottom: 12px;
77
+ text-transform: uppercase;
78
+ letter-spacing: 0.5px;
79
+ }
80
+
81
+ .sidebar label {
82
+ display: block;
83
+ font-size: 12px;
84
+ color: var(--gray-600);
85
+ font-weight: 600;
86
+ margin-top: 12px;
87
+ margin-bottom: 6px;
88
+ }
89
+
90
+ .sidebar textarea {
91
+ width: 100%;
92
+ min-height: 80px;
93
+ padding: 8px;
94
+ border: 1px solid var(--gray-300);
95
+ border-radius: 6px;
96
+ font-size: 12px;
97
+ resize: vertical;
98
+ }
99
+
100
+ .sidebar input[type="range"] {
101
+ width: 100%;
102
+ margin-top: 6px;
103
+ }
104
+
105
+ .chat-container {
106
+ flex: 1;
107
+ display: flex;
108
+ flex-direction: column;
109
+ background: white;
110
+ border-radius: 8px;
111
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
112
+ overflow: hidden;
113
+ }
114
+
115
+ .messages {
116
+ flex: 1;
117
+ overflow-y: auto;
118
+ padding: 20px;
119
+ display: flex;
120
+ flex-direction: column;
121
+ gap: 12px;
122
+ }
123
+
124
+ .empty-state {
125
+ display: flex;
126
+ flex-direction: column;
127
+ align-items: center;
128
+ justify-content: center;
129
+ gap: 20px;
130
+ text-align: center;
131
+ padding: 40px;
132
+ color: var(--gray-600);
133
+ }
134
+
135
+ .empty-icon {
136
+ font-size: 48px;
137
+ opacity: 0.6;
138
+ }
139
+
140
+ .message {
141
+ display: flex;
142
+ gap: 10px;
143
+ animation: slideIn 0.3s ease;
144
+ }
145
+
146
+ @keyframes slideIn {
147
+ from { opacity: 0; transform: translateY(10px); }
148
+ to { opacity: 1; transform: translateY(0); }
149
+ }
150
+
151
+ .message.user {
152
+ justify-content: flex-end;
153
+ }
154
+
155
+ .message-avatar {
156
+ font-size: 18px;
157
+ flex-shrink: 0;
158
+ }
159
+
160
+ .message-content {
161
+ display: flex;
162
+ flex-direction: column;
163
+ gap: 4px;
164
+ max-width: 70%;
165
+ }
166
+
167
+ .message.user .message-content {
168
+ align-items: flex-end;
169
+ }
170
+
171
+ .message-text {
172
+ padding: 10px 14px;
173
+ border-radius: 12px;
174
+ word-wrap: break-word;
175
+ font-size: 13px;
176
+ line-height: 1.5;
177
+ }
178
+
179
+ .message.user .message-text {
180
+ background: var(--primary);
181
+ color: white;
182
+ }
183
+
184
+ .message.assistant .message-text {
185
+ background: var(--gray-100);
186
+ color: var(--gray-900);
187
+ }
188
+
189
+ .message-stats {
190
+ font-size: 11px;
191
+ color: var(--gray-500);
192
+ padding: 0 14px;
193
+ }
194
+
195
+ .typing-indicator {
196
+ display: flex;
197
+ gap: 4px;
198
+ padding: 10px 14px;
199
+ }
200
+
201
+ .typing-indicator span {
202
+ width: 6px;
203
+ height: 6px;
204
+ border-radius: 50%;
205
+ background: var(--gray-400);
206
+ animation: typing 1.4s infinite;
207
+ }
208
+
209
+ .typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
210
+ .typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
211
+
212
+ @keyframes typing {
213
+ 0%, 60%, 100% { opacity: 0.5; transform: translateY(0); }
214
+ 30% { opacity: 1; transform: translateY(-8px); }
215
+ }
216
+
217
+ .input-area {
218
+ display: flex;
219
+ gap: 10px;
220
+ padding: 15px 20px;
221
+ border-top: 1px solid var(--gray-300);
222
+ background: var(--gray-50);
223
+ }
224
+
225
+ .input-area textarea {
226
+ flex: 1;
227
+ padding: 10px;
228
+ border: 1px solid var(--gray-300);
229
+ border-radius: 6px;
230
+ font-size: 13px;
231
+ resize: none;
232
+ max-height: 80px;
233
+ font-family: inherit;
234
+ }
235
+
236
+ .send-btn {
237
+ background: var(--primary);
238
+ color: white;
239
+ border: none;
240
+ padding: 10px 20px;
241
+ border-radius: 6px;
242
+ font-size: 12px;
243
+ font-weight: 600;
244
+ cursor: pointer;
245
+ align-self: flex-end;
246
+ white-space: nowrap;
247
+ transition: all 0.2s;
248
+ }
249
+
250
+ .send-btn:hover:not(:disabled) {
251
+ background: var(--secondary);
252
+ transform: translateY(-2px);
253
+ box-shadow: 0 10px 15px rgba(0, 0, 0, 0.1);
254
+ }
255
+
256
+ .send-btn:disabled {
257
+ opacity: 0.5;
258
+ cursor: not-allowed;
259
+ }
260
+
261
+ @media (max-width: 768px) {
262
+ .app-content {
263
+ flex-direction: column;
264
+ gap: 10px;
265
+ }
266
+
267
+ .sidebar {
268
+ width: 100%;
269
+ }
270
+
271
+ .message-content {
272
+ max-width: 85%;
273
+ }
274
+
275
+ .input-area {
276
+ flex-direction: column;
277
+ }
278
+
279
+ .send-btn {
280
+ align-self: stretch;
281
+ }
282
+ }
283
+ </style>
284
+ </head>
285
+ <body>
286
+ <div class="app-container">
287
+ <header class="app-header">
288
+ <h1>🤖 Zephyr-7B Chatbot</h1>
289
+ <p>Powered by Hugging Face Spaces</p>
290
+ </header>
291
+
292
+ <div class="app-content">
293
+ <aside class="sidebar">
294
+ <h3>⚙️ Settings</h3>
295
+
296
+ <label>System Prompt</label>
297
+ <textarea id="systemPrompt" placeholder="Define assistant role...">Du bist ein hilfsbereiter KI-Assistent.</textarea>
298
+
299
+ <label>Temperature: <span id="tempValue">0.70</span></label>
300
+ <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7">
301
+
302
+ <label>Top P: <span id="topPValue">0.90</span></label>
303
+ <input type="range" id="topP" min="0" max="1" step="0.05" value="0.9">
304
+
305
+ <div id="stats" style="margin-top: 20px; padding: 15px; background: #f0f9ff; border-radius: 6px; display: none;">
306
+ <h4 style="font-size: 12px; margin-bottom: 8px;">📊 Last Response</h4>
307
+ <div style="font-size: 11px; color: var(--gray-600);">
308
+ <div>Tokens: <strong id="statsTokens">-</strong></div>
309
+ <div>Time: <strong id="statsTime">-</strong>s</div>
310
+ </div>
311
+ </div>
312
+ </aside>
313
+
314
+ <main class="chat-container">
315
+ <div class="messages" id="messages">
316
+ <div class="empty-state">
317
+ <div class="empty-icon">🤖</div>
318
+ <h2 style="color: var(--gray-900);">Welcome!</h2>
319
+ <p>Start a conversation with Zephyr-7B</p>
320
+ </div>
321
+ </div>
322
+
323
+ <div class="input-area">
324
+ <textarea id="messageInput" placeholder="Type your message... (Shift+Enter for new line)" rows="3"></textarea>
325
+ <button class="send-btn" id="sendBtn" onclick="sendMessage()">➤ Send</button>
326
+ </div>
327
+ </main>
328
+ </div>
329
+ </div>
330
+
331
+ <script>
332
+ const messagesDiv = document.getElementById('messages');
333
+ const messageInput = document.getElementById('messageInput');
334
+ const sendBtn = document.getElementById('sendBtn');
335
+ const systemPromptInput = document.getElementById('systemPrompt');
336
+ const tempSlider = document.getElementById('temperature');
337
+ const topPSlider = document.getElementById('topP');
338
+ const statsDiv = document.getElementById('stats');
339
+
340
+ let isLoading = false;
341
+ let messages = [];
342
+
343
+ // Update display values
344
+ tempSlider.addEventListener('input', (e) => {
345
+ document.getElementById('tempValue').textContent = parseFloat(e.target.value).toFixed(2);
346
+ });
347
+
348
+ topPSlider.addEventListener('input', (e) => {
349
+ document.getElementById('topPValue').textContent = parseFloat(e.target.value).toFixed(2);
350
+ });
351
+
352
+ messageInput.addEventListener('keypress', (e) => {
353
+ if (e.key === 'Enter' && !e.shiftKey && !isLoading) {
354
+ e.preventDefault();
355
+ sendMessage();
356
+ }
357
+ });
358
+
359
+ async function sendMessage() {
360
+ const message = messageInput.value.trim();
361
+ if (!message || isLoading) return;
362
+
363
+ isLoading = true;
364
+ sendBtn.disabled = true;
365
+ messageInput.value = '';
366
+
367
+ // Clear empty state
368
+ if (messagesDiv.querySelector('.empty-state')) {
369
+ messagesDiv.innerHTML = '';
370
+ }
371
+
372
+ // Add user message
373
+ addMessage('user', message);
374
+
375
+ try {
376
+ const response = await fetch('/api/generate', {
377
+ method: 'POST',
378
+ headers: { 'Content-Type': 'application/json' },
379
+ body: JSON.stringify({
380
+ prompt: message,
381
+ system_prompt: systemPromptInput.value,
382
+ temperature: parseFloat(tempSlider.value),
383
+ top_p: parseFloat(topPSlider.value),
384
+ max_tokens: 512
385
+ })
386
+ });
387
+
388
+ if (!response.ok) {
389
+ throw new Error(`HTTP error! status: ${response.status}`);
390
+ }
391
+
392
+ const data = await response.json();
393
+
394
+ addMessage('assistant', data.response, {
395
+ tokens: data.tokens,
396
+ time: data.time_seconds
397
+ });
398
+
399
+ // Show stats
400
+ document.getElementById('statsTokens').textContent = data.tokens;
401
+ document.getElementById('statsTime').textContent = data.time_seconds;
402
+ statsDiv.style.display = 'block';
403
+
404
+ } catch (error) {
405
+ addMessage('assistant', `❌ Error: ${error.message}`);
406
+ } finally {
407
+ isLoading = false;
408
+ sendBtn.disabled = false;
409
+ messageInput.focus();
410
+ }
411
+ }
412
+
413
+ function addMessage(role, content, stats = null) {
414
+ const messageEl = document.createElement('div');
415
+ messageEl.className = `message ${role}`;
416
+
417
+ const avatar = role === 'user' ? '👤' : '🤖';
418
+
419
+ let html = `
420
+ <div class="message-avatar">${avatar}</div>
421
+ <div class="message-content">
422
+ <div class="message-text">${escapeHtml(content)}</div>
423
+ `;
424
+
425
+ if (stats) {
426
+ html += `<div class="message-stats">⏱️ ${stats.time}s • 📊 ${stats.tokens} tokens</div>`;
427
+ }
428
+
429
+ html += '</div>';
430
+ messageEl.innerHTML = html;
431
+ messagesDiv.appendChild(messageEl);
432
+ messagesDiv.scrollTop = messagesDiv.scrollHeight;
433
+ }
434
+
435
+ function escapeHtml(text) {
436
+ const div = document.createElement('div');
437
+ div.textContent = text;
438
+ return div.innerHTML;
439
+ }
440
+
441
+ // Initial focus
442
+ messageInput.focus();
443
+ </script>
444
+ </body>
445
+ </html>
index.html CHANGED
@@ -29,6 +29,6 @@
29
  </head>
30
  <body>
31
  <div id="root"></div>
32
- <script type="module" src="../src/main.jsx"></script>
33
  </body>
34
  </html>
 
29
  </head>
30
  <body>
31
  <div id="root"></div>
32
+ <script type="module" src="/src/main.jsx"></script>
33
  </body>
34
  </html>
vite.config.js CHANGED
@@ -1,11 +1,9 @@
1
  import { defineConfig } from 'vite'
2
  import react from '@vitejs/plugin-react'
3
- import { resolve } from 'path'
4
 
5
  export default defineConfig({
6
  plugins: [react()],
7
- // WICHTIG: Zeigt Vite, dass deine index.html im public-Ordner wohnt
8
- root: '',
9
  server: {
10
  port: 5173,
11
  proxy: {
@@ -17,7 +15,10 @@ export default defineConfig({
17
  }
18
  },
19
  build: {
20
- // Schiebt das fertige Build-Ergebnis wieder hoch ins Hauptverzeichnis nach /dist
 
 
 
21
  outDir: resolve(__dirname, 'dist'),
22
  emptyOutDir: true,
23
  sourcemap: true,
 
1
  import { defineConfig } from 'vite'
2
  import react from '@vitejs/plugin-react'
 
3
 
4
  export default defineConfig({
5
  plugins: [react()],
6
+ root: '.', // Root ist das aktuelle Verzeichnis
 
7
  server: {
8
  port: 5173,
9
  proxy: {
 
15
  }
16
  },
17
  build: {
18
+ outDir: 'dist',
19
+ sourcemap: true,
20
+ }
21
+ })
22
  outDir: resolve(__dirname, 'dist'),
23
  emptyOutDir: true,
24
  sourcemap: true,