OrbitMC commited on
Commit
4db9045
·
verified ·
1 Parent(s): db1556d

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +872 -339
Dockerfile CHANGED
@@ -1,375 +1,908 @@
1
- # ============================================================
2
- # Dockerfile — Fast Anime-English TTS Server (Piper-based)
3
- # ============================================================
4
- # Build: docker build -t anime-tts .
5
- # Run: docker run -p 5000:5000 anime-tts
6
- # Usage: curl -X POST http://localhost:5000/tts \
7
- # -H "Content-Type: application/json" \
8
- # -d '{"text":"Hello senpai! Welcome to the anime world!"}' \
9
- # --output speech.wav
10
- # ============================================================
11
 
12
- FROM python:3.11-slim
13
 
14
- # Install system deps
15
- RUN apt-get update && apt-get install -y --no-install-recommends \
16
- wget \
17
- curl \
18
- libsndfile1 \
19
  ffmpeg \
 
 
 
20
  && rm -rf /var/lib/apt/lists/*
21
 
22
- WORKDIR /app
 
 
23
 
24
- # Install Python dependencies
25
  RUN pip install --no-cache-dir \
 
26
  flask \
 
27
  piper-tts \
28
  numpy \
29
  scipy
30
 
31
- # --------------------------------------------------------------------------
32
- # Download a fast, high-quality anime-style English voice
33
- # We use "lessac" (medium quality, very expressive/bright) as the base
34
- # and also download an anime-adjacent voice.
35
- #
36
- # Available voices: https://huggingface.co/rhasspy/piper-voices/tree/main
37
- #
38
- # Voice options (pick ONE pair — model + config):
39
- # 1) en_US-lessac-medium — bright, expressive female (anime-adjacent)
40
- # 2) en_US-libritts_r-medium multiple speakers, some sound anime-like
41
- # 3) en_GB-jenny_dioco-medium — young British female
42
- #
43
- # We'll download TWO voices so users can pick via the API.
44
- # --------------------------------------------------------------------------
45
-
46
- RUN mkdir -p /app/voices
47
-
48
- # Voice 1: Lessac (bright, expressive, anime-adjacent female)
49
- RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx" \
50
- -O /app/voices/lessac.onnx && \
51
- wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json" \
52
- -O /app/voices/lessac.onnx.json
53
-
54
- # Voice 2: Jenny Dioco (young, bright British female anime dub style)
55
- RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx" \
56
- -O /app/voices/jenny.onnx && \
57
- wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx.json" \
58
- -O /app/voices/jenny.onnx.json
59
-
60
- # Voice 3: Amy (medium, clear North-American — works well sped up)
61
- RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx" \
62
- -O /app/voices/amy.onnx && \
63
- wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx.json" \
64
- -O /app/voices/amy.onnx.json
65
-
66
- # --------------------------------------------------------------------------
67
- # Create the TTS API server
68
- # --------------------------------------------------------------------------
69
- RUN cat > /app/server.py << 'PYTHON_SERVER'
70
- #!/usr/bin/env python3
71
- """
72
- Fast Anime-Voice TTS Server using Piper.
73
-
74
- Endpoints:
75
- POST /tts — Generate speech, return WAV
76
- POST /tts/stream — Generate speech, return streaming WAV
77
- GET /voices — List available voices
78
- GET /health — Health check
79
-
80
- JSON body for /tts:
81
- {
82
- "text": "Hello world!",
83
- "voice": "lessac", // optional: lessac, jenny, amy (default: lessac)
84
- "speed": 1.0, // optional: 0.5-2.0 (default: 1.0)
85
- "pitch_shift": 0, // optional: semitones to shift pitch (for anime effect, try 2-4)
86
- "output_format": "wav" // optional: wav, mp3 (default: wav)
87
- }
88
- """
89
-
90
- import io
91
- import os
92
  import time
 
 
 
93
  import wave
94
  import struct
95
- import subprocess
96
- import tempfile
97
- import logging
98
- from pathlib import Path
99
- from typing import Optional
100
 
101
- import numpy as np
102
- from flask import Flask, request, jsonify, send_file, Response
103
- from piper import PiperVoice
104
 
105
- logging.basicConfig(level=logging.INFO)
106
- logger = logging.getLogger("anime-tts")
 
107
 
108
- app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # ---- Voice Registry ----
111
- VOICES_DIR = Path("/app/voices")
112
- VOICE_MAP = {
113
- "lessac": VOICES_DIR / "lessac.onnx",
114
- "jenny": VOICES_DIR / "jenny.onnx",
115
- "amy": VOICES_DIR / "amy.onnx",
116
  }
117
 
118
- # Cache loaded voices for speed
119
- _voice_cache: dict[str, PiperVoice] = {}
120
-
121
-
122
- def get_voice(name: str) -> PiperVoice:
123
- """Load and cache a Piper voice."""
124
- if name not in _voice_cache:
125
- model_path = VOICE_MAP.get(name)
126
- if not model_path or not model_path.exists():
127
- raise ValueError(f"Voice '{name}' not found. Available: {list(VOICE_MAP.keys())}")
128
- logger.info(f"Loading voice: {name} from {model_path}")
129
- _voice_cache[name] = PiperVoice.load(str(model_path))
130
- logger.info(f"Voice '{name}' loaded successfully")
131
- return _voice_cache[name]
132
-
133
-
134
- def synthesize_speech(
135
- text: str,
136
- voice_name: str = "lessac",
137
- speed: float = 1.0,
138
- pitch_shift: int = 0,
139
- output_format: str = "wav",
140
- ) -> io.BytesIO:
141
- """Synthesize text to speech and return audio bytes."""
142
-
143
- voice = get_voice(voice_name)
144
-
145
- # Synthesize to WAV in memory
146
- wav_buffer = io.BytesIO()
147
-
148
- # Piper uses length_scale for speed (inverse: lower = faster)
149
- length_scale = 1.0 / max(0.25, min(speed, 4.0))
150
-
151
- with wave.open(wav_buffer, "wb") as wav_file:
152
- voice.synthesize(
153
- text,
154
- wav_file,
155
- length_scale=length_scale,
156
- sentence_silence=0.15,
157
- )
158
-
159
- wav_buffer.seek(0)
160
-
161
- # Apply pitch shift if requested (for anime effect)
162
- if pitch_shift != 0 or output_format == "mp3":
163
- wav_buffer = post_process_audio(wav_buffer, pitch_shift, output_format)
164
-
165
- return wav_buffer
166
-
167
-
168
- def post_process_audio(
169
- wav_buffer: io.BytesIO,
170
- pitch_shift: int = 0,
171
- output_format: str = "wav",
172
- ) -> io.BytesIO:
173
- """Apply pitch shifting and format conversion using ffmpeg."""
174
-
175
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_in:
176
- tmp_in.write(wav_buffer.read())
177
- tmp_in_path = tmp_in.name
178
-
179
- suffix = f".{output_format}"
180
- with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp_out:
181
- tmp_out_path = tmp_out.name
182
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  try:
184
- # Build ffmpeg command
185
- cmd = ["ffmpeg", "-y", "-i", tmp_in_path]
186
-
187
- filters = []
188
-
189
- # Pitch shift using asetrate + aresample (fast method)
190
- if pitch_shift != 0:
191
- # Calculate rate multiplier from semitones
192
- rate_mult = 2 ** (pitch_shift / 12.0)
193
- # Read original sample rate
194
- with wave.open(tmp_in_path, "rb") as wf:
195
- orig_sr = wf.getframerate()
196
- new_sr = int(orig_sr * rate_mult)
197
- filters.append(f"asetrate={new_sr}")
198
- filters.append(f"aresample={orig_sr}")
199
- # Compensate tempo change from pitch shift
200
- tempo = 1.0 / rate_mult
201
- if 0.5 <= tempo <= 2.0:
202
- filters.append(f"atempo={tempo}")
203
- elif tempo < 0.5:
204
- # Chain atempo filters for extreme values
205
- filters.append(f"atempo=0.5,atempo={tempo/0.5}")
206
-
207
- if filters:
208
- cmd.extend(["-af", ",".join(filters)])
209
-
210
- if output_format == "mp3":
211
- cmd.extend(["-codec:a", "libmp3lame", "-q:a", "2"])
212
-
213
- cmd.append(tmp_out_path)
214
-
215
- result = subprocess.run(
216
- cmd, capture_output=True, timeout=30
217
  )
218
-
219
- if result.returncode != 0:
220
- logger.error(f"ffmpeg error: {result.stderr.decode()}")
221
- # Fall back to original
222
- wav_buffer.seek(0)
223
- return wav_buffer
224
-
225
- output_buffer = io.BytesIO()
226
- with open(tmp_out_path, "rb") as f:
227
- output_buffer.write(f.read())
228
- output_buffer.seek(0)
229
- return output_buffer
230
-
231
- finally:
232
- os.unlink(tmp_in_path)
233
- if os.path.exists(tmp_out_path):
234
- os.unlink(tmp_out_path)
235
-
236
-
237
- # ---- Pre-warm default voice on startup ----
238
- @app.before_request
239
- def _warmup():
240
- """Lazy warmup — load default voice on first request."""
241
- app.before_request_funcs[None].remove(_warmup)
242
- try:
243
- get_voice("lessac")
244
  except Exception as e:
245
- logger.warning(f"Warmup failed: {e}")
 
246
 
 
247
 
248
- # ---- API Routes ----
249
-
250
- @app.route("/health", methods=["GET"])
251
- def health():
252
- return jsonify({"status": "ok", "engine": "piper-tts", "cached_voices": list(_voice_cache.keys())})
253
-
254
-
255
- @app.route("/voices", methods=["GET"])
256
- def list_voices():
257
- voices = []
258
- for name, path in VOICE_MAP.items():
259
- voices.append({
260
- "name": name,
261
- "available": path.exists(),
262
- "description": {
263
- "lessac": "Bright expressive US female — anime-adjacent, great default",
264
- "jenny": "Young bright British female — anime dub style",
265
- "amy": "Clear US female — works well with pitch shift for anime effect",
266
- }.get(name, ""),
267
- "tip": "Try pitch_shift=2 or pitch_shift=3 for more anime-like sound",
268
- })
269
- return jsonify({"voices": voices})
270
-
271
-
272
- @app.route("/tts", methods=["POST"])
273
  def tts():
274
- """Main TTS endpoint."""
275
- start = time.time()
276
-
277
- data = request.get_json(force=True, silent=True) or {}
278
-
279
- text = data.get("text", "").strip()
280
  if not text:
281
- return jsonify({"error": "No text provided"}), 400
282
-
283
- if len(text) > 10000:
284
- return jsonify({"error": "Text too long (max 10000 chars)"}), 400
285
-
286
- voice_name = data.get("voice", "lessac")
287
- speed = float(data.get("speed", 1.0))
288
- pitch_shift = int(data.get("pitch_shift", 0))
289
- output_format = data.get("output_format", "wav").lower()
290
-
291
- if output_format not in ("wav", "mp3"):
292
- return jsonify({"error": "output_format must be 'wav' or 'mp3'"}), 400
293
-
294
- if voice_name not in VOICE_MAP:
295
- return jsonify({
296
- "error": f"Unknown voice '{voice_name}'",
297
- "available": list(VOICE_MAP.keys())
298
- }), 400
299
-
300
- try:
301
- audio_buffer = synthesize_speech(
302
- text=text,
303
- voice_name=voice_name,
304
- speed=speed,
305
- pitch_shift=pitch_shift,
306
- output_format=output_format,
307
- )
308
- except Exception as e:
309
- logger.exception("Synthesis failed")
310
- return jsonify({"error": str(e)}), 500
311
-
312
- elapsed = time.time() - start
313
- logger.info(f"TTS: {len(text)} chars, voice={voice_name}, speed={speed}, "
314
- f"pitch={pitch_shift}, format={output_format}, time={elapsed:.3f}s")
315
-
316
- mimetype = "audio/wav" if output_format == "wav" else "audio/mpeg"
317
-
318
- return send_file(
319
- audio_buffer,
320
- mimetype=mimetype,
321
- as_attachment=True,
322
- download_name=f"speech.{output_format}",
323
- )
324
-
325
-
326
- @app.route("/tts/batch", methods=["POST"])
327
- def tts_batch():
328
- """Batch TTS — synthesize multiple texts."""
329
- data = request.get_json(force=True, silent=True) or {}
330
- texts = data.get("texts", [])
331
-
332
- if not texts or not isinstance(texts, list):
333
- return jsonify({"error": "Provide 'texts' as a list of strings"}), 400
334
-
335
- voice_name = data.get("voice", "lessac")
336
- speed = float(data.get("speed", 1.0))
337
- pitch_shift = int(data.get("pitch_shift", 0))
338
-
339
- # Concatenate all texts with pauses
340
- combined = ". ".join(texts)
341
-
342
  try:
343
- audio_buffer = synthesize_speech(
344
- text=combined,
345
- voice_name=voice_name,
346
- speed=speed,
347
- pitch_shift=pitch_shift,
 
 
 
 
 
 
 
 
348
  )
349
- except Exception as e:
350
- return jsonify({"error": str(e)}), 500
351
-
352
- return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="batch.wav")
353
 
 
 
 
354
 
355
- if __name__ == "__main__":
356
- # Pre-load default voice
357
- logger.info("Pre-loading default voice...")
358
- try:
359
- get_voice("lessac")
360
- logger.info("Default voice ready!")
 
 
 
361
  except Exception as e:
362
- logger.error(f"Failed to pre-load voice: {e}")
363
-
364
- app.run(host="0.0.0.0", port=7860, threaded=True)
365
- PYTHON_SERVER
366
 
367
- # Expose port
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  EXPOSE 7860
369
 
370
- # Health check
371
- HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
372
- CMD curl -f http://localhost:5000/health || exit 1
373
-
374
- # Run the server
375
- CMD ["python", "/app/server.py"]
 
1
+ FROM ghcr.io/ggml-org/llama.cpp:full
 
 
 
 
 
 
 
 
 
2
 
3
+ WORKDIR /app
4
 
5
+ # Install all dependencies
6
+ RUN apt update && apt install -y \
7
+ python3-pip \
8
+ python3-venv \
 
9
  ffmpeg \
10
+ libsndfile1 \
11
+ espeak-ng \
12
+ curl \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
+ # Create venv to avoid pip externally-managed error
16
+ RUN python3 -m venv /opt/venv
17
+ ENV PATH="/opt/venv/bin:$PATH"
18
 
19
+ # Install Python packages - using piper-tts (tiny, fast, high quality, CPU-friendly)
20
  RUN pip install --no-cache-dir \
21
+ huggingface_hub \
22
  flask \
23
+ requests \
24
  piper-tts \
25
  numpy \
26
  scipy
27
 
28
+ # Download LLM model
29
+ RUN python3 -c 'from huggingface_hub import hf_hub_download; \
30
+ repo="unsloth/Qwen3.5-0.8B-GGUF"; \
31
+ hf_hub_download(repo_id=repo, filename="Qwen3.5-0.8B-UD-Q5_K_XL.gguf", local_dir="/app")'
32
+
33
+ # Download Piper voice model - using "lessac" high quality (fast + good quality anime-ish tone)
34
+ # Using medium quality for speed on CPU - sounds clean and crisp
35
+ RUN mkdir -p /app/voices && \
36
+ python3 -c ' \
37
+ from huggingface_hub import hf_hub_download; \
38
+ import os; \
39
+ # Using a high-quality fast English voice \
40
+ hf_hub_download( \
41
+ repo_id="rhasspy/piper-voices", \
42
+ filename="en/en_US/lessac/high/en_US-lessac-high.onnx", \
43
+ local_dir="/app/voices" \
44
+ ); \
45
+ hf_hub_download( \
46
+ repo_id="rhasspy/piper-voices", \
47
+ filename="en/en_US/lessac/high/en_US-lessac-high.onnx.json", \
48
+ local_dir="/app/voices" \
49
+ )'
50
+
51
+ # Create the entire Jarvis app as a single Python file embedded in Dockerfile
52
+ RUN cat << 'PYTHON_APP' > /app/jarvis.py
53
+ import subprocess
54
+ import threading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  import time
56
+ import os
57
+ import io
58
+ import json
59
  import wave
60
  import struct
61
+ import requests
62
+ from flask import Flask, request, jsonify, send_file, Response, render_template_string
 
 
 
63
 
64
+ app = Flask(__name__)
 
 
65
 
66
+ LLAMA_URL = "http://127.0.0.1:8080"
67
+ VOICE_MODEL = "/app/voices/en/en_US/lessac/high/en_US-lessac-high.onnx"
68
+ VOICE_CONFIG = VOICE_MODEL + ".json"
69
 
70
+ # ============================================================
71
+ # HTML/CSS/JS - Full Jarvis UI embedded
72
+ # ============================================================
73
+ HTML_PAGE = """
74
+ <!DOCTYPE html>
75
+ <html lang="en">
76
+ <head>
77
+ <meta charset="UTF-8">
78
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
79
+ <title>J.A.R.V.I.S. - AI Assistant</title>
80
+ <style>
81
+ @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Rajdhani:wght@300;400;500;600;700&display=swap');
82
+
83
+ * { margin: 0; padding: 0; box-sizing: border-box; }
84
+
85
+ body {
86
+ background: #0a0a0f;
87
+ color: #00d4ff;
88
+ font-family: 'Rajdhani', sans-serif;
89
+ min-height: 100vh;
90
+ overflow-x: hidden;
91
+ }
92
+
93
+ /* Animated background */
94
+ body::before {
95
+ content: '';
96
+ position: fixed;
97
+ top: 0; left: 0; right: 0; bottom: 0;
98
+ background:
99
+ radial-gradient(ellipse at 20% 50%, rgba(0, 212, 255, 0.03) 0%, transparent 50%),
100
+ radial-gradient(ellipse at 80% 50%, rgba(0, 100, 255, 0.03) 0%, transparent 50%),
101
+ radial-gradient(ellipse at 50% 0%, rgba(0, 212, 255, 0.05) 0%, transparent 40%);
102
+ z-index: -1;
103
+ animation: bgPulse 8s ease-in-out infinite;
104
+ }
105
+
106
+ @keyframes bgPulse {
107
+ 0%, 100% { opacity: 0.5; }
108
+ 50% { opacity: 1; }
109
+ }
110
+
111
+ /* Grid lines background */
112
+ body::after {
113
+ content: '';
114
+ position: fixed;
115
+ top: 0; left: 0; right: 0; bottom: 0;
116
+ background-image:
117
+ linear-gradient(rgba(0, 212, 255, 0.03) 1px, transparent 1px),
118
+ linear-gradient(90deg, rgba(0, 212, 255, 0.03) 1px, transparent 1px);
119
+ background-size: 50px 50px;
120
+ z-index: -1;
121
+ }
122
+
123
+ .container {
124
+ max-width: 900px;
125
+ margin: 0 auto;
126
+ padding: 20px;
127
+ min-height: 100vh;
128
+ display: flex;
129
+ flex-direction: column;
130
+ }
131
+
132
+ /* Header */
133
+ .header {
134
+ text-align: center;
135
+ padding: 30px 0 20px;
136
+ position: relative;
137
+ }
138
+
139
+ .header h1 {
140
+ font-family: 'Orbitron', sans-serif;
141
+ font-size: 2.5em;
142
+ font-weight: 900;
143
+ letter-spacing: 15px;
144
+ background: linear-gradient(135deg, #00d4ff, #0088ff, #00d4ff);
145
+ background-size: 200% 200%;
146
+ -webkit-background-clip: text;
147
+ -webkit-text-fill-color: transparent;
148
+ animation: gradientShift 3s ease-in-out infinite;
149
+ text-shadow: 0 0 30px rgba(0, 212, 255, 0.3);
150
+ }
151
+
152
+ @keyframes gradientShift {
153
+ 0%, 100% { background-position: 0% 50%; }
154
+ 50% { background-position: 100% 50%; }
155
+ }
156
+
157
+ .header .subtitle {
158
+ font-family: 'Rajdhani', sans-serif;
159
+ font-size: 0.85em;
160
+ color: rgba(0, 212, 255, 0.4);
161
+ letter-spacing: 8px;
162
+ margin-top: 5px;
163
+ font-weight: 300;
164
+ }
165
+
166
+ .status-bar {
167
+ display: flex;
168
+ justify-content: center;
169
+ gap: 30px;
170
+ margin-top: 15px;
171
+ font-size: 0.75em;
172
+ letter-spacing: 2px;
173
+ color: rgba(0, 212, 255, 0.3);
174
+ }
175
+
176
+ .status-item {
177
+ display: flex;
178
+ align-items: center;
179
+ gap: 6px;
180
+ }
181
+
182
+ .status-dot {
183
+ width: 6px;
184
+ height: 6px;
185
+ border-radius: 50%;
186
+ background: #00ff88;
187
+ box-shadow: 0 0 10px #00ff88;
188
+ animation: dotPulse 2s ease-in-out infinite;
189
+ }
190
+
191
+ .status-dot.processing {
192
+ background: #ffaa00;
193
+ box-shadow: 0 0 10px #ffaa00;
194
+ animation: dotPulse 0.5s ease-in-out infinite;
195
+ }
196
+
197
+ @keyframes dotPulse {
198
+ 0%, 100% { opacity: 1; }
199
+ 50% { opacity: 0.3; }
200
+ }
201
+
202
+ /* Arc Reactor Animation */
203
+ .reactor-container {
204
+ display: flex;
205
+ justify-content: center;
206
+ margin: 10px 0;
207
+ }
208
+
209
+ .reactor {
210
+ width: 80px;
211
+ height: 80px;
212
+ position: relative;
213
+ }
214
+
215
+ .reactor-ring {
216
+ position: absolute;
217
+ border: 2px solid rgba(0, 212, 255, 0.3);
218
+ border-radius: 50%;
219
+ border-top-color: #00d4ff;
220
+ }
221
+
222
+ .reactor-ring:nth-child(1) {
223
+ width: 80px; height: 80px; top: 0; left: 0;
224
+ animation: spin 3s linear infinite;
225
+ }
226
+ .reactor-ring:nth-child(2) {
227
+ width: 60px; height: 60px; top: 10px; left: 10px;
228
+ animation: spin 2s linear infinite reverse;
229
+ }
230
+ .reactor-ring:nth-child(3) {
231
+ width: 40px; height: 40px; top: 20px; left: 20px;
232
+ animation: spin 1.5s linear infinite;
233
+ }
234
+ .reactor-core {
235
+ position: absolute;
236
+ width: 20px; height: 20px;
237
+ top: 30px; left: 30px;
238
+ background: radial-gradient(circle, #00d4ff, #0066ff);
239
+ border-radius: 50%;
240
+ box-shadow: 0 0 20px #00d4ff, 0 0 40px rgba(0, 212, 255, 0.3);
241
+ animation: corePulse 2s ease-in-out infinite;
242
+ }
243
+
244
+ .reactor.active .reactor-ring { border-top-color: #00ff88; }
245
+ .reactor.active .reactor-core {
246
+ background: radial-gradient(circle, #00ff88, #00d4ff);
247
+ box-shadow: 0 0 30px #00ff88, 0 0 60px rgba(0, 255, 136, 0.3);
248
+ }
249
+
250
+ @keyframes spin { to { transform: rotate(360deg); } }
251
+ @keyframes corePulse {
252
+ 0%, 100% { transform: scale(1); opacity: 1; }
253
+ 50% { transform: scale(1.2); opacity: 0.8; }
254
+ }
255
+
256
+ /* Chat area */
257
+ .chat-area {
258
+ flex: 1;
259
+ overflow-y: auto;
260
+ padding: 20px 0;
261
+ margin: 10px 0;
262
+ max-height: 55vh;
263
+ scrollbar-width: thin;
264
+ scrollbar-color: rgba(0, 212, 255, 0.3) transparent;
265
+ }
266
+
267
+ .chat-area::-webkit-scrollbar { width: 4px; }
268
+ .chat-area::-webkit-scrollbar-track { background: transparent; }
269
+ .chat-area::-webkit-scrollbar-thumb {
270
+ background: rgba(0, 212, 255, 0.3);
271
+ border-radius: 2px;
272
+ }
273
+
274
+ .message {
275
+ margin: 12px 0;
276
+ padding: 15px 20px;
277
+ border-radius: 12px;
278
+ animation: messageIn 0.4s cubic-bezier(0.22, 1, 0.36, 1);
279
+ position: relative;
280
+ line-height: 1.6;
281
+ font-size: 1.05em;
282
+ }
283
+
284
+ @keyframes messageIn {
285
+ from { opacity: 0; transform: translateY(15px); }
286
+ to { opacity: 1; transform: translateY(0); }
287
+ }
288
+
289
+ .message.user {
290
+ background: linear-gradient(135deg, rgba(0, 212, 255, 0.1), rgba(0, 100, 255, 0.05));
291
+ border: 1px solid rgba(0, 212, 255, 0.15);
292
+ margin-left: 60px;
293
+ color: #e0f0ff;
294
+ }
295
+
296
+ .message.user::before {
297
+ content: 'YOU';
298
+ position: absolute;
299
+ top: -8px;
300
+ right: 15px;
301
+ font-family: 'Orbitron', sans-serif;
302
+ font-size: 0.6em;
303
+ color: rgba(0, 212, 255, 0.5);
304
+ letter-spacing: 3px;
305
+ }
306
+
307
+ .message.jarvis {
308
+ background: linear-gradient(135deg, rgba(0, 255, 136, 0.05), rgba(0, 212, 255, 0.03));
309
+ border: 1px solid rgba(0, 255, 136, 0.1);
310
+ margin-right: 60px;
311
+ color: #d0ffe0;
312
+ }
313
+
314
+ .message.jarvis::before {
315
+ content: 'JARVIS';
316
+ position: absolute;
317
+ top: -8px;
318
+ left: 15px;
319
+ font-family: 'Orbitron', sans-serif;
320
+ font-size: 0.6em;
321
+ color: rgba(0, 255, 136, 0.5);
322
+ letter-spacing: 3px;
323
+ }
324
+
325
+ .message.system {
326
+ text-align: center;
327
+ color: rgba(0, 212, 255, 0.3);
328
+ font-size: 0.8em;
329
+ border: none;
330
+ background: none;
331
+ letter-spacing: 2px;
332
+ }
333
+
334
+ /* Audio player in message */
335
+ .audio-btn {
336
+ display: inline-flex;
337
+ align-items: center;
338
+ gap: 8px;
339
+ margin-top: 10px;
340
+ padding: 6px 14px;
341
+ background: rgba(0, 212, 255, 0.1);
342
+ border: 1px solid rgba(0, 212, 255, 0.3);
343
+ border-radius: 20px;
344
+ color: #00d4ff;
345
+ cursor: pointer;
346
+ font-family: 'Rajdhani', sans-serif;
347
+ font-size: 0.85em;
348
+ letter-spacing: 1px;
349
+ transition: all 0.3s;
350
+ }
351
+
352
+ .audio-btn:hover {
353
+ background: rgba(0, 212, 255, 0.2);
354
+ box-shadow: 0 0 15px rgba(0, 212, 255, 0.2);
355
+ }
356
+
357
+ .audio-btn.playing {
358
+ background: rgba(0, 255, 136, 0.15);
359
+ border-color: rgba(0, 255, 136, 0.4);
360
+ color: #00ff88;
361
+ }
362
+
363
+ .audio-btn svg { width: 14px; height: 14px; fill: currentColor; }
364
+
365
+ /* Thinking indicator */
366
+ .thinking {
367
+ display: flex;
368
+ align-items: center;
369
+ gap: 8px;
370
+ padding: 15px 20px;
371
+ color: rgba(0, 212, 255, 0.5);
372
+ font-size: 0.9em;
373
+ }
374
+
375
+ .thinking-dots span {
376
+ display: inline-block;
377
+ width: 4px; height: 4px;
378
+ background: #00d4ff;
379
+ border-radius: 50%;
380
+ animation: thinkBounce 1.4s ease-in-out infinite;
381
+ margin: 0 2px;
382
+ }
383
+ .thinking-dots span:nth-child(2) { animation-delay: 0.2s; }
384
+ .thinking-dots span:nth-child(3) { animation-delay: 0.4s; }
385
+
386
+ @keyframes thinkBounce {
387
+ 0%, 80%, 100% { transform: scale(0.6); opacity: 0.3; }
388
+ 40% { transform: scale(1); opacity: 1; }
389
+ }
390
+
391
+ /* Input area */
392
+ .input-area {
393
+ padding: 15px 0;
394
+ position: relative;
395
+ }
396
+
397
+ .input-wrapper {
398
+ display: flex;
399
+ gap: 10px;
400
+ align-items: flex-end;
401
+ background: rgba(0, 212, 255, 0.03);
402
+ border: 1px solid rgba(0, 212, 255, 0.15);
403
+ border-radius: 16px;
404
+ padding: 8px;
405
+ transition: all 0.3s;
406
+ }
407
+
408
+ .input-wrapper:focus-within {
409
+ border-color: rgba(0, 212, 255, 0.4);
410
+ box-shadow: 0 0 20px rgba(0, 212, 255, 0.1);
411
+ }
412
+
413
+ #userInput {
414
+ flex: 1;
415
+ background: transparent;
416
+ border: none;
417
+ color: #e0f0ff;
418
+ font-family: 'Rajdhani', sans-serif;
419
+ font-size: 1.1em;
420
+ padding: 10px 15px;
421
+ outline: none;
422
+ resize: none;
423
+ max-height: 120px;
424
+ line-height: 1.5;
425
+ }
426
+
427
+ #userInput::placeholder {
428
+ color: rgba(0, 212, 255, 0.25);
429
+ letter-spacing: 1px;
430
+ }
431
+
432
+ .send-btn {
433
+ width: 48px;
434
+ height: 48px;
435
+ border-radius: 12px;
436
+ border: 1px solid rgba(0, 212, 255, 0.3);
437
+ background: linear-gradient(135deg, rgba(0, 212, 255, 0.15), rgba(0, 100, 255, 0.1));
438
+ color: #00d4ff;
439
+ cursor: pointer;
440
+ display: flex;
441
+ align-items: center;
442
+ justify-content: center;
443
+ transition: all 0.3s;
444
+ flex-shrink: 0;
445
+ }
446
+
447
+ .send-btn:hover:not(:disabled) {
448
+ background: linear-gradient(135deg, rgba(0, 212, 255, 0.3), rgba(0, 100, 255, 0.2));
449
+ box-shadow: 0 0 20px rgba(0, 212, 255, 0.2);
450
+ transform: scale(1.05);
451
+ }
452
+
453
+ .send-btn:disabled {
454
+ opacity: 0.3;
455
+ cursor: not-allowed;
456
+ }
457
+
458
+ .send-btn svg { width: 20px; height: 20px; fill: currentColor; }
459
+
460
+ /* Voice toggle */
461
+ .controls {
462
+ display: flex;
463
+ justify-content: center;
464
+ gap: 15px;
465
+ margin-top: 10px;
466
+ }
467
+
468
+ .toggle-btn {
469
+ display: flex;
470
+ align-items: center;
471
+ gap: 6px;
472
+ padding: 6px 16px;
473
+ border-radius: 20px;
474
+ border: 1px solid rgba(0, 212, 255, 0.2);
475
+ background: rgba(0, 212, 255, 0.05);
476
+ color: rgba(0, 212, 255, 0.5);
477
+ cursor: pointer;
478
+ font-family: 'Rajdhani', sans-serif;
479
+ font-size: 0.8em;
480
+ letter-spacing: 2px;
481
+ transition: all 0.3s;
482
+ }
483
+
484
+ .toggle-btn.active {
485
+ border-color: rgba(0, 255, 136, 0.4);
486
+ background: rgba(0, 255, 136, 0.1);
487
+ color: #00ff88;
488
+ }
489
+
490
+ .toggle-btn svg { width: 14px; height: 14px; fill: currentColor; }
491
+
492
+ /* Waveform visualization */
493
+ .waveform {
494
+ display: flex;
495
+ align-items: center;
496
+ justify-content: center;
497
+ gap: 3px;
498
+ height: 30px;
499
+ margin: 5px 0;
500
+ opacity: 0;
501
+ transition: opacity 0.3s;
502
+ }
503
+
504
+ .waveform.active { opacity: 1; }
505
+
506
+ .waveform-bar {
507
+ width: 3px;
508
+ background: linear-gradient(to top, #00d4ff, #00ff88);
509
+ border-radius: 2px;
510
+ animation: wave 0.8s ease-in-out infinite;
511
+ }
512
+
513
+ @keyframes wave {
514
+ 0%, 100% { height: 5px; }
515
+ 50% { height: 25px; }
516
+ }
517
+
518
+ /* Mobile */
519
+ @media (max-width: 600px) {
520
+ .header h1 { font-size: 1.8em; letter-spacing: 8px; }
521
+ .message { margin-left: 10px !important; margin-right: 10px !important; }
522
+ .status-bar { gap: 15px; font-size: 0.65em; }
523
+ }
524
+ </style>
525
+ </head>
526
+ <body>
527
+
528
+ <div class="container">
529
+ <div class="header">
530
+ <h1>J.A.R.V.I.S.</h1>
531
+ <div class="subtitle">JUST A RATHER VERY INTELLIGENT SYSTEM</div>
532
+ <div class="status-bar">
533
+ <div class="status-item">
534
+ <div class="status-dot" id="statusDot"></div>
535
+ <span id="statusText">ONLINE</span>
536
+ </div>
537
+ <div class="status-item">
538
+ <span>NEURAL CORE v3.5</span>
539
+ </div>
540
+ <div class="status-item">
541
+ <span>VOICE SYNTHESIS ACTIVE</span>
542
+ </div>
543
+ </div>
544
+ </div>
545
+
546
+ <div class="reactor-container">
547
+ <div class="reactor" id="reactor">
548
+ <div class="reactor-ring"></div>
549
+ <div class="reactor-ring"></div>
550
+ <div class="reactor-ring"></div>
551
+ <div class="reactor-core"></div>
552
+ </div>
553
+ </div>
554
+
555
+ <div class="waveform" id="waveform">
556
+ <div class="waveform-bar" style="animation-delay: 0s"></div>
557
+ <div class="waveform-bar" style="animation-delay: 0.1s"></div>
558
+ <div class="waveform-bar" style="animation-delay: 0.2s"></div>
559
+ <div class="waveform-bar" style="animation-delay: 0.3s"></div>
560
+ <div class="waveform-bar" style="animation-delay: 0.4s"></div>
561
+ <div class="waveform-bar" style="animation-delay: 0.3s"></div>
562
+ <div class="waveform-bar" style="animation-delay: 0.2s"></div>
563
+ <div class="waveform-bar" style="animation-delay: 0.1s"></div>
564
+ <div class="waveform-bar" style="animation-delay: 0s"></div>
565
+ </div>
566
+
567
+ <div class="chat-area" id="chatArea">
568
+ <div class="message system">— SYSTEM INITIALIZED — AWAITING INPUT —</div>
569
+ </div>
570
+
571
+ <div class="input-area">
572
+ <div class="input-wrapper">
573
+ <textarea id="userInput" rows="1" placeholder="Speak to J.A.R.V.I.S. ..."
574
+ onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();sendMessage();}"></textarea>
575
+ <button class="send-btn" id="sendBtn" onclick="sendMessage()">
576
+ <svg viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg>
577
+ </button>
578
+ </div>
579
+ <div class="controls">
580
+ <button class="toggle-btn active" id="voiceToggle" onclick="toggleVoice()">
581
+ <svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02zM14 3.23v2.06c2.89.86 5 3.54 5 6.71s-2.11 5.85-5 6.71v2.06c4.01-.91 7-4.49 7-8.77s-2.99-7.86-7-8.77z"/></svg>
582
+ <span>VOICE</span>
583
+ </button>
584
+ <button class="toggle-btn active" id="autoplayToggle" onclick="toggleAutoplay()">
585
+ <svg viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
586
+ <span>AUTOPLAY</span>
587
+ </button>
588
+ </div>
589
+ </div>
590
+ </div>
591
+
592
+ <script>
593
+ let voiceEnabled = true;
594
+ let autoplayEnabled = true;
595
+ let isProcessing = false;
596
+ let currentAudio = null;
597
+ let conversationHistory = [];
598
+
599
+ const SYSTEM_PROMPT = `You are J.A.R.V.I.S. (Just A Rather Very Intelligent System), Tony Stark's AI assistant. You speak with a refined, witty, slightly British-accented personality. You are helpful, intelligent, and occasionally sardonic. Keep responses concise but informative. You address the user as "Sir" or "Ma'am" occasionally. You have a dry sense of humor. Never break character. Do not use emojis. Respond naturally and conversationally. Keep answers under 150 words unless asked for detail.`;
600
+
601
+ function toggleVoice() {
602
+ voiceEnabled = !voiceEnabled;
603
+ document.getElementById('voiceToggle').classList.toggle('active', voiceEnabled);
604
+ }
605
 
606
+ function toggleAutoplay() {
607
+ autoplayEnabled = !autoplayEnabled;
608
+ document.getElementById('autoplayToggle').classList.toggle('active', autoplayEnabled);
 
 
 
609
  }
610
 
611
+ function addMessage(text, type) {
612
+ const chatArea = document.getElementById('chatArea');
613
+ const div = document.createElement('div');
614
+ div.className = 'message ' + type;
615
+ div.textContent = text;
616
+
617
+ if (type === 'jarvis' && voiceEnabled) {
618
+ const audioBtn = document.createElement('div');
619
+ audioBtn.className = 'audio-btn';
620
+ audioBtn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
621
+ audioBtn.onclick = () => playVoice(text, audioBtn);
622
+ div.appendChild(audioBtn);
623
+
624
+ if (autoplayEnabled) {
625
+ setTimeout(() => playVoice(text, audioBtn), 300);
626
+ }
627
+ }
628
+
629
+ chatArea.appendChild(div);
630
+ chatArea.scrollTop = chatArea.scrollHeight;
631
+ }
632
+
633
+ function showThinking() {
634
+ const chatArea = document.getElementById('chatArea');
635
+ const div = document.createElement('div');
636
+ div.className = 'thinking';
637
+ div.id = 'thinkingIndicator';
638
+ div.innerHTML = 'Processing <div class="thinking-dots"><span></span><span></span><span></span></div>';
639
+ chatArea.appendChild(div);
640
+ chatArea.scrollTop = chatArea.scrollHeight;
641
+ }
642
+
643
+ function removeThinking() {
644
+ const el = document.getElementById('thinkingIndicator');
645
+ if (el) el.remove();
646
+ }
647
+
648
+ async function playVoice(text, btn) {
649
+ if (currentAudio) {
650
+ currentAudio.pause();
651
+ document.querySelectorAll('.audio-btn.playing').forEach(b => {
652
+ b.classList.remove('playing');
653
+ b.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
654
+ });
655
+ }
656
+
657
+ btn.classList.add('playing');
658
+ btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg> SPEAKING...';
659
+ document.getElementById('waveform').classList.add('active');
660
+ document.getElementById('reactor').classList.add('active');
661
+
662
+ try {
663
+ const resp = await fetch('/tts', {
664
+ method: 'POST',
665
+ headers: {'Content-Type': 'application/json'},
666
+ body: JSON.stringify({text: text})
667
+ });
668
+ const blob = await resp.blob();
669
+ const url = URL.createObjectURL(blob);
670
+ currentAudio = new Audio(url);
671
+
672
+ currentAudio.playbackRate = 1.1;
673
+
674
+ currentAudio.onended = () => {
675
+ btn.classList.remove('playing');
676
+ btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
677
+ document.getElementById('waveform').classList.remove('active');
678
+ document.getElementById('reactor').classList.remove('active');
679
+ currentAudio = null;
680
+ };
681
+
682
+ await currentAudio.play();
683
+ } catch(e) {
684
+ console.error('TTS error:', e);
685
+ btn.classList.remove('playing');
686
+ btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> RETRY';
687
+ document.getElementById('waveform').classList.remove('active');
688
+ document.getElementById('reactor').classList.remove('active');
689
+ }
690
+ }
691
+
692
+ async function sendMessage() {
693
+ if (isProcessing) return;
694
+
695
+ const input = document.getElementById('userInput');
696
+ const text = input.value.trim();
697
+ if (!text) return;
698
+
699
+ input.value = '';
700
+ input.style.height = 'auto';
701
+ isProcessing = true;
702
+ document.getElementById('sendBtn').disabled = true;
703
+ document.getElementById('statusDot').classList.add('processing');
704
+ document.getElementById('statusText').textContent = 'PROCESSING';
705
+
706
+ addMessage(text, 'user');
707
+
708
+ conversationHistory.push({role: 'user', content: text});
709
+
710
+ showThinking();
711
+
712
+ try {
713
+ const messages = [
714
+ {role: 'system', content: SYSTEM_PROMPT},
715
+ ...conversationHistory.slice(-10)
716
+ ];
717
+
718
+ const resp = await fetch('/chat', {
719
+ method: 'POST',
720
+ headers: {'Content-Type': 'application/json'},
721
+ body: JSON.stringify({messages: messages})
722
+ });
723
+
724
+ const data = await resp.json();
725
+ removeThinking();
726
+
727
+ let reply = data.reply || 'I apologize, Sir. My neural pathways seem to be experiencing a momentary disruption.';
728
+
729
+ // Clean up thinking tags if present
730
+ reply = reply.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
731
+
732
+ conversationHistory.push({role: 'assistant', content: reply});
733
+ addMessage(reply, 'jarvis');
734
+
735
+ } catch(e) {
736
+ removeThinking();
737
+ addMessage('Systems experiencing interference. Please try again, Sir.', 'jarvis');
738
+ }
739
+
740
+ isProcessing = false;
741
+ document.getElementById('sendBtn').disabled = false;
742
+ document.getElementById('statusDot').classList.remove('processing');
743
+ document.getElementById('statusText').textContent = 'ONLINE';
744
+ }
745
+
746
+ // Auto-resize textarea
747
+ document.getElementById('userInput').addEventListener('input', function() {
748
+ this.style.height = 'auto';
749
+ this.style.height = Math.min(this.scrollHeight, 120) + 'px';
750
+ });
751
+
752
+ // Initial greeting
753
+ setTimeout(() => {
754
+ addMessage('Good day. J.A.R.V.I.S. at your service. All systems are nominal and ready to assist. What can I do for you?', 'jarvis');
755
+ }, 1000);
756
+ </script>
757
+ </body>
758
+ </html>
759
+ """
760
+
761
+ @app.route('/')
762
+ def index():
763
+ return render_template_string(HTML_PAGE)
764
+
765
+ @app.route('/chat', methods=['POST'])
766
+ def chat():
767
+ data = request.json
768
+ messages = data.get('messages', [])
769
+
770
  try:
771
+ resp = requests.post(
772
+ f"{LLAMA_URL}/v1/chat/completions",
773
+ json={
774
+ "model": "jarvis",
775
+ "messages": messages,
776
+ "max_tokens": 512,
777
+ "temperature": 0.7,
778
+ "top_p": 0.9,
779
+ "stream": False
780
+ },
781
+ timeout=120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  )
783
+ result = resp.json()
784
+ reply = result['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
  except Exception as e:
786
+ print(f"LLM Error: {e}")
787
+ reply = "I'm experiencing a temporary system fluctuation, Sir. Please try again."
788
 
789
+ return jsonify({"reply": reply})
790
 
791
+ @app.route('/tts', methods=['POST'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792
  def tts():
793
+ data = request.json
794
+ text = data.get('text', '')
795
+
 
 
 
796
  if not text:
797
+ return jsonify({"error": "No text"}), 400
798
+
799
+ # Clean text for TTS
800
+ text = text.replace('"', '').replace('*', '').replace('#', '')
801
+ # Limit length for CPU speed
802
+ if len(text) > 1000:
803
+ text = text[:1000] + '.'
804
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  try:
806
+ # Use piper CLI for TTS - it's fast on CPU
807
+ proc = subprocess.run(
808
+ [
809
+ 'piper',
810
+ '--model', VOICE_MODEL,
811
+ '--config', VOICE_CONFIG,
812
+ '--output_file', '/tmp/tts_output.wav',
813
+ '--length-scale', '0.85',
814
+ '--sentence-silence', '0.15'
815
+ ],
816
+ input=text.encode('utf-8'),
817
+ capture_output=True,
818
+ timeout=60
819
  )
 
 
 
 
820
 
821
+ if proc.returncode != 0:
822
+ print(f"Piper error: {proc.stderr.decode()}")
823
+ return jsonify({"error": "TTS failed"}), 500
824
 
825
+ return send_file(
826
+ '/tmp/tts_output.wav',
827
+ mimetype='audio/wav',
828
+ as_attachment=False,
829
+ download_name='jarvis_voice.wav'
830
+ )
831
+
832
+ except subprocess.TimeoutExpired:
833
+ return jsonify({"error": "TTS timeout"}), 500
834
  except Exception as e:
835
+ print(f"TTS Error: {e}")
836
+ return jsonify({"error": str(e)}), 500
 
 
837
 
838
+ @app.route('/health')
839
+ def health():
840
+ return jsonify({"status": "ok"})
841
+
842
+ if __name__ == '__main__':
843
+ app.run(host='0.0.0.0', port=7861)
844
+ PYTHON_APP
845
+
846
+ # Create startup script
847
+ RUN cat << 'STARTUP' > /app/start.sh
848
+ #!/bin/bash
849
+ set -e
850
+
851
+ echo "========================================="
852
+ echo " J.A.R.V.I.S. SYSTEM INITIALIZATION"
853
+ echo "========================================="
854
+
855
+ # Start llama.cpp server in background
856
+ echo "[BOOT] Starting Neural Core (LLM)..."
857
+ /app/llama-server \
858
+ -m /app/Qwen3.5-0.8B-UD-Q5_K_XL.gguf \
859
+ --host 0.0.0.0 \
860
+ --port 8080 \
861
+ -t 2 \
862
+ --cache-type-k q8_0 \
863
+ --cache-type-v q4_0 \
864
+ -c 4096 \
865
+ -n 2048 \
866
+ --no-mmap &
867
+
868
+ LLAMA_PID=$!
869
+
870
+ # Wait for llama.cpp to be ready
871
+ echo "[BOOT] Waiting for Neural Core..."
872
+ for i in $(seq 1 60); do
873
+ if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
874
+ echo "[BOOT] Neural Core ONLINE"
875
+ break
876
+ fi
877
+ sleep 2
878
+ done
879
+
880
+ # Start Flask app (the main UI on port 7860)
881
+ echo "[BOOT] Starting Voice Interface..."
882
+ cd /app
883
+ source /opt/venv/bin/activate
884
+
885
+ # Use a simple redirect: Flask on 7861, and we reverse proxy
886
+ # Actually, let's just run Flask on 7860 directly
887
+ python3 jarvis.py &
888
+ FLASK_PID=$!
889
+
890
+ echo "========================================="
891
+ echo " J.A.R.V.I.S. SYSTEMS ONLINE"
892
+ echo " UI: http://0.0.0.0:7860"
893
+ echo " LLM: http://0.0.0.0:8080"
894
+ echo "========================================="
895
+
896
+ # Wait for either process
897
+ wait -n $LLAMA_PID $FLASK_PID
898
+ STARTUP
899
+
900
+ RUN chmod +x /app/start.sh
901
+
902
+ # Fix the Flask port to 7860 (HF Spaces expects this)
903
+ RUN sed -i "s/port=7861/port=7860/" /app/jarvis.py
904
+
905
+ # Reduce context size for free CPU tier
906
  EXPOSE 7860
907
 
908
+ CMD ["/bin/bash", "/app/start.sh"]