Spaces:

OrbitMC
/

vai

Sleeping

App Files Files Community

OrbitMC commited on 11 days ago

Commit

4db9045

verified ·

1 Parent(s): db1556d

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +872 -339

Dockerfile CHANGED Viewed

@@ -1,375 +1,908 @@
-# ============================================================
-# Dockerfile — Fast Anime-English TTS Server (Piper-based)
-# ============================================================
-# Build:  docker build -t anime-tts .
-# Run:    docker run -p 5000:5000 anime-tts
-# Usage:  curl -X POST http://localhost:5000/tts \
-#           -H "Content-Type: application/json" \
-#           -d '{"text":"Hello senpai! Welcome to the anime world!"}' \
-#           --output speech.wav
-# ============================================================
-FROM python:3.11-slim
-# Install system deps
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    wget \
-    curl \
-    libsndfile1 \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-# Install Python dependencies
 RUN pip install --no-cache-dir \
     flask \
     piper-tts \
     numpy \
     scipy
-# --------------------------------------------------------------------------
-# Download a fast, high-quality anime-style English voice
-# We use "lessac" (medium quality, very expressive/bright) as the base
-# and also download an anime-adjacent voice.
-#
-# Available voices: https://huggingface.co/rhasspy/piper-voices/tree/main
-#
-# Voice options (pick ONE pair — model + config):
-#   1) en_US-lessac-medium    — bright, expressive female (anime-adjacent)
-#   2) en_US-libritts_r-medium — multiple speakers, some sound anime-like
-#   3) en_GB-jenny_dioco-medium — young British female
-#
-# We'll download TWO voices so users can pick via the API.
-# --------------------------------------------------------------------------
-RUN mkdir -p /app/voices
-# Voice 1: Lessac (bright, expressive, anime-adjacent female)
-RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx" \
-    -O /app/voices/lessac.onnx && \
-    wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json" \
-    -O /app/voices/lessac.onnx.json
-# Voice 2: Jenny Dioco (young, bright British female — anime dub style)
-RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx" \
-    -O /app/voices/jenny.onnx && \
-    wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx.json" \
-    -O /app/voices/jenny.onnx.json
-# Voice 3: Amy (medium, clear North-American — works well sped up)
-RUN wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx" \
-    -O /app/voices/amy.onnx && \
-    wget -q "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx.json" \
-    -O /app/voices/amy.onnx.json
-# --------------------------------------------------------------------------
-# Create the TTS API server
-# --------------------------------------------------------------------------
-RUN cat > /app/server.py << 'PYTHON_SERVER'
-#!/usr/bin/env python3
-"""
-Fast Anime-Voice TTS Server using Piper.
-Endpoints:
-  POST /tts          — Generate speech, return WAV
-  POST /tts/stream   — Generate speech, return streaming WAV
-  GET  /voices       — List available voices
-  GET  /health       — Health check
-JSON body for /tts:
-{
-  "text": "Hello world!",
-  "voice": "lessac",          // optional: lessac, jenny, amy (default: lessac)
-  "speed": 1.0,               // optional: 0.5-2.0 (default: 1.0)
-  "pitch_shift": 0,           // optional: semitones to shift pitch (for anime effect, try 2-4)
-  "output_format": "wav"      // optional: wav, mp3 (default: wav)
-}
-"""
-import io
-import os
 import time
 import wave
 import struct
-import subprocess
-import tempfile
-import logging
-from pathlib import Path
-from typing import Optional
-import numpy as np
-from flask import Flask, request, jsonify, send_file, Response
-from piper import PiperVoice
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("anime-tts")
-app = Flask(__name__)
-# ---- Voice Registry ----
-VOICES_DIR = Path("/app/voices")
-VOICE_MAP = {
-    "lessac": VOICES_DIR / "lessac.onnx",
-    "jenny": VOICES_DIR / "jenny.onnx",
-    "amy": VOICES_DIR / "amy.onnx",
 }
-# Cache loaded voices for speed
-_voice_cache: dict[str, PiperVoice] = {}
-def get_voice(name: str) -> PiperVoice:
-    """Load and cache a Piper voice."""
-    if name not in _voice_cache:
-        model_path = VOICE_MAP.get(name)
-        if not model_path or not model_path.exists():
-            raise ValueError(f"Voice '{name}' not found. Available: {list(VOICE_MAP.keys())}")
-        logger.info(f"Loading voice: {name} from {model_path}")
-        _voice_cache[name] = PiperVoice.load(str(model_path))
-        logger.info(f"Voice '{name}' loaded successfully")
-    return _voice_cache[name]
-def synthesize_speech(
-    text: str,
-    voice_name: str = "lessac",
-    speed: float = 1.0,
-    pitch_shift: int = 0,
-    output_format: str = "wav",
-) -> io.BytesIO:
-    """Synthesize text to speech and return audio bytes."""
-    voice = get_voice(voice_name)
-    # Synthesize to WAV in memory
-    wav_buffer = io.BytesIO()
-    # Piper uses length_scale for speed (inverse: lower = faster)
-    length_scale = 1.0 / max(0.25, min(speed, 4.0))
-    with wave.open(wav_buffer, "wb") as wav_file:
-        voice.synthesize(
-            text,
-            wav_file,
-            length_scale=length_scale,
-            sentence_silence=0.15,
-        )
-    wav_buffer.seek(0)
-    # Apply pitch shift if requested (for anime effect)
-    if pitch_shift != 0 or output_format == "mp3":
-        wav_buffer = post_process_audio(wav_buffer, pitch_shift, output_format)
-    return wav_buffer
-def post_process_audio(
-    wav_buffer: io.BytesIO,
-    pitch_shift: int = 0,
-    output_format: str = "wav",
-) -> io.BytesIO:
-    """Apply pitch shifting and format conversion using ffmpeg."""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_in:
-        tmp_in.write(wav_buffer.read())
-        tmp_in_path = tmp_in.name
-    suffix = f".{output_format}"
-    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp_out:
-        tmp_out_path = tmp_out.name
     try:
-        # Build ffmpeg command
-        cmd = ["ffmpeg", "-y", "-i", tmp_in_path]
-        filters = []
-        # Pitch shift using asetrate + aresample (fast method)
-        if pitch_shift != 0:
-            # Calculate rate multiplier from semitones
-            rate_mult = 2 ** (pitch_shift / 12.0)
-            # Read original sample rate
-            with wave.open(tmp_in_path, "rb") as wf:
-                orig_sr = wf.getframerate()
-            new_sr = int(orig_sr * rate_mult)
-            filters.append(f"asetrate={new_sr}")
-            filters.append(f"aresample={orig_sr}")
-            # Compensate tempo change from pitch shift
-            tempo = 1.0 / rate_mult
-            if 0.5 <= tempo <= 2.0:
-                filters.append(f"atempo={tempo}")
-            elif tempo < 0.5:
-                # Chain atempo filters for extreme values
-                filters.append(f"atempo=0.5,atempo={tempo/0.5}")
-        if filters:
-            cmd.extend(["-af", ",".join(filters)])
-        if output_format == "mp3":
-            cmd.extend(["-codec:a", "libmp3lame", "-q:a", "2"])
-        cmd.append(tmp_out_path)
-        result = subprocess.run(
-            cmd, capture_output=True, timeout=30
         )
-        if result.returncode != 0:
-            logger.error(f"ffmpeg error: {result.stderr.decode()}")
-            # Fall back to original
-            wav_buffer.seek(0)
-            return wav_buffer
-        output_buffer = io.BytesIO()
-        with open(tmp_out_path, "rb") as f:
-            output_buffer.write(f.read())
-        output_buffer.seek(0)
-        return output_buffer
-    finally:
-        os.unlink(tmp_in_path)
-        if os.path.exists(tmp_out_path):
-            os.unlink(tmp_out_path)
-# ---- Pre-warm default voice on startup ----
-@app.before_request
-def _warmup():
-    """Lazy warmup — load default voice on first request."""
-    app.before_request_funcs[None].remove(_warmup)
-    try:
-        get_voice("lessac")
     except Exception as e:
-        logger.warning(f"Warmup failed: {e}")
-# ---- API Routes ----
-@app.route("/health", methods=["GET"])
-def health():
-    return jsonify({"status": "ok", "engine": "piper-tts", "cached_voices": list(_voice_cache.keys())})
-@app.route("/voices", methods=["GET"])
-def list_voices():
-    voices = []
-    for name, path in VOICE_MAP.items():
-        voices.append({
-            "name": name,
-            "available": path.exists(),
-            "description": {
-                "lessac": "Bright expressive US female — anime-adjacent, great default",
-                "jenny": "Young bright British female — anime dub style",
-                "amy": "Clear US female — works well with pitch shift for anime effect",
-            }.get(name, ""),
-            "tip": "Try pitch_shift=2 or pitch_shift=3 for more anime-like sound",
-        })
-    return jsonify({"voices": voices})
-@app.route("/tts", methods=["POST"])
 def tts():
-    """Main TTS endpoint."""
-    start = time.time()
-    data = request.get_json(force=True, silent=True) or {}
-    text = data.get("text", "").strip()
     if not text:
-        return jsonify({"error": "No text provided"}), 400
-    if len(text) > 10000:
-        return jsonify({"error": "Text too long (max 10000 chars)"}), 400
-    voice_name = data.get("voice", "lessac")
-    speed = float(data.get("speed", 1.0))
-    pitch_shift = int(data.get("pitch_shift", 0))
-    output_format = data.get("output_format", "wav").lower()
-    if output_format not in ("wav", "mp3"):
-        return jsonify({"error": "output_format must be 'wav' or 'mp3'"}), 400
-    if voice_name not in VOICE_MAP:
-        return jsonify({
-            "error": f"Unknown voice '{voice_name}'",
-            "available": list(VOICE_MAP.keys())
-        }), 400
-    try:
-        audio_buffer = synthesize_speech(
-            text=text,
-            voice_name=voice_name,
-            speed=speed,
-            pitch_shift=pitch_shift,
-            output_format=output_format,
-        )
-    except Exception as e:
-        logger.exception("Synthesis failed")
-        return jsonify({"error": str(e)}), 500
-    elapsed = time.time() - start
-    logger.info(f"TTS: {len(text)} chars, voice={voice_name}, speed={speed}, "
-                f"pitch={pitch_shift}, format={output_format}, time={elapsed:.3f}s")
-    mimetype = "audio/wav" if output_format == "wav" else "audio/mpeg"
-    return send_file(
-        audio_buffer,
-        mimetype=mimetype,
-        as_attachment=True,
-        download_name=f"speech.{output_format}",
-    )
-@app.route("/tts/batch", methods=["POST"])
-def tts_batch():
-    """Batch TTS — synthesize multiple texts."""
-    data = request.get_json(force=True, silent=True) or {}
-    texts = data.get("texts", [])
-    if not texts or not isinstance(texts, list):
-        return jsonify({"error": "Provide 'texts' as a list of strings"}), 400
-    voice_name = data.get("voice", "lessac")
-    speed = float(data.get("speed", 1.0))
-    pitch_shift = int(data.get("pitch_shift", 0))
-    # Concatenate all texts with pauses
-    combined = ". ".join(texts)
     try:
-        audio_buffer = synthesize_speech(
-            text=combined,
-            voice_name=voice_name,
-            speed=speed,
-            pitch_shift=pitch_shift,
         )
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-    return send_file(audio_buffer, mimetype="audio/wav", as_attachment=True, download_name="batch.wav")
-if __name__ == "__main__":
-    # Pre-load default voice
-    logger.info("Pre-loading default voice...")
-    try:
-        get_voice("lessac")
-        logger.info("Default voice ready!")
     except Exception as e:
-        logger.error(f"Failed to pre-load voice: {e}")
-    app.run(host="0.0.0.0", port=7860, threaded=True)
-PYTHON_SERVER
-# Expose port
 EXPOSE 7860
-# Health check
-HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
-    CMD curl -f http://localhost:5000/health || exit 1
-# Run the server
-CMD ["python", "/app/server.py"]

+FROM ghcr.io/ggml-org/llama.cpp:full
+WORKDIR /app
+# Install all dependencies
+RUN apt update && apt install -y \
+    python3-pip \
+    python3-venv \
     ffmpeg \
+    libsndfile1 \
+    espeak-ng \
+    curl \
     && rm -rf /var/lib/apt/lists/*
+# Create venv to avoid pip externally-managed error
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# Install Python packages - using piper-tts (tiny, fast, high quality, CPU-friendly)
 RUN pip install --no-cache-dir \
+    huggingface_hub \
     flask \
+    requests \
     piper-tts \
     numpy \
     scipy
+# Download LLM model
+RUN python3 -c 'from huggingface_hub import hf_hub_download; \
+    repo="unsloth/Qwen3.5-0.8B-GGUF"; \
+    hf_hub_download(repo_id=repo, filename="Qwen3.5-0.8B-UD-Q5_K_XL.gguf", local_dir="/app")'
+# Download Piper voice model - using "lessac" high quality (fast + good quality anime-ish tone)
+# Using medium quality for speed on CPU - sounds clean and crisp
+RUN mkdir -p /app/voices && \
+    python3 -c ' \
+from huggingface_hub import hf_hub_download; \
+import os; \
+# Using a high-quality fast English voice \
+hf_hub_download( \
+    repo_id="rhasspy/piper-voices", \
+    filename="en/en_US/lessac/high/en_US-lessac-high.onnx", \
+    local_dir="/app/voices" \
+); \
+hf_hub_download( \
+    repo_id="rhasspy/piper-voices", \
+    filename="en/en_US/lessac/high/en_US-lessac-high.onnx.json", \
+    local_dir="/app/voices" \
+)'
+# Create the entire Jarvis app as a single Python file embedded in Dockerfile
+RUN cat << 'PYTHON_APP' > /app/jarvis.py
+import subprocess
+import threading
 import time
+import os
+import io
+import json
 import wave
 import struct
+import requests
+from flask import Flask, request, jsonify, send_file, Response, render_template_string
+app = Flask(__name__)
+LLAMA_URL = "http://127.0.0.1:8080"
+VOICE_MODEL = "/app/voices/en/en_US/lessac/high/en_US-lessac-high.onnx"
+VOICE_CONFIG = VOICE_MODEL + ".json"
+# ============================================================
+# HTML/CSS/JS - Full Jarvis UI embedded
+# ============================================================
+HTML_PAGE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>J.A.R.V.I.S. - AI Assistant</title>
+<style>
+  @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Rajdhani:wght@300;400;500;600;700&display=swap');
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body {
+    background: #0a0a0f;
+    color: #00d4ff;
+    font-family: 'Rajdhani', sans-serif;
+    min-height: 100vh;
+    overflow-x: hidden;
+  }
+  /* Animated background */
+  body::before {
+    content: '';
+    position: fixed;
+    top: 0; left: 0; right: 0; bottom: 0;
+    background:
+      radial-gradient(ellipse at 20% 50%, rgba(0, 212, 255, 0.03) 0%, transparent 50%),
+      radial-gradient(ellipse at 80% 50%, rgba(0, 100, 255, 0.03) 0%, transparent 50%),
+      radial-gradient(ellipse at 50% 0%, rgba(0, 212, 255, 0.05) 0%, transparent 40%);
+    z-index: -1;
+    animation: bgPulse 8s ease-in-out infinite;
+  }
+  @keyframes bgPulse {
+    0%, 100% { opacity: 0.5; }
+    50% { opacity: 1; }
+  }
+  /* Grid lines background */
+  body::after {
+    content: '';
+    position: fixed;
+    top: 0; left: 0; right: 0; bottom: 0;
+    background-image:
+      linear-gradient(rgba(0, 212, 255, 0.03) 1px, transparent 1px),
+      linear-gradient(90deg, rgba(0, 212, 255, 0.03) 1px, transparent 1px);
+    background-size: 50px 50px;
+    z-index: -1;
+  }
+  .container {
+    max-width: 900px;
+    margin: 0 auto;
+    padding: 20px;
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+  }
+  /* Header */
+  .header {
+    text-align: center;
+    padding: 30px 0 20px;
+    position: relative;
+  }
+  .header h1 {
+    font-family: 'Orbitron', sans-serif;
+    font-size: 2.5em;
+    font-weight: 900;
+    letter-spacing: 15px;
+    background: linear-gradient(135deg, #00d4ff, #0088ff, #00d4ff);
+    background-size: 200% 200%;
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    animation: gradientShift 3s ease-in-out infinite;
+    text-shadow: 0 0 30px rgba(0, 212, 255, 0.3);
+  }
+  @keyframes gradientShift {
+    0%, 100% { background-position: 0% 50%; }
+    50% { background-position: 100% 50%; }
+  }
+  .header .subtitle {
+    font-family: 'Rajdhani', sans-serif;
+    font-size: 0.85em;
+    color: rgba(0, 212, 255, 0.4);
+    letter-spacing: 8px;
+    margin-top: 5px;
+    font-weight: 300;
+  }
+  .status-bar {
+    display: flex;
+    justify-content: center;
+    gap: 30px;
+    margin-top: 15px;
+    font-size: 0.75em;
+    letter-spacing: 2px;
+    color: rgba(0, 212, 255, 0.3);
+  }
+  .status-item {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+  }
+  .status-dot {
+    width: 6px;
+    height: 6px;
+    border-radius: 50%;
+    background: #00ff88;
+    box-shadow: 0 0 10px #00ff88;
+    animation: dotPulse 2s ease-in-out infinite;
+  }
+  .status-dot.processing {
+    background: #ffaa00;
+    box-shadow: 0 0 10px #ffaa00;
+    animation: dotPulse 0.5s ease-in-out infinite;
+  }
+  @keyframes dotPulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.3; }
+  }
+  /* Arc Reactor Animation */
+  .reactor-container {
+    display: flex;
+    justify-content: center;
+    margin: 10px 0;
+  }
+  .reactor {
+    width: 80px;
+    height: 80px;
+    position: relative;
+  }
+  .reactor-ring {
+    position: absolute;
+    border: 2px solid rgba(0, 212, 255, 0.3);
+    border-radius: 50%;
+    border-top-color: #00d4ff;
+  }
+  .reactor-ring:nth-child(1) {
+    width: 80px; height: 80px; top: 0; left: 0;
+    animation: spin 3s linear infinite;
+  }
+  .reactor-ring:nth-child(2) {
+    width: 60px; height: 60px; top: 10px; left: 10px;
+    animation: spin 2s linear infinite reverse;
+  }
+  .reactor-ring:nth-child(3) {
+    width: 40px; height: 40px; top: 20px; left: 20px;
+    animation: spin 1.5s linear infinite;
+  }
+  .reactor-core {
+    position: absolute;
+    width: 20px; height: 20px;
+    top: 30px; left: 30px;
+    background: radial-gradient(circle, #00d4ff, #0066ff);
+    border-radius: 50%;
+    box-shadow: 0 0 20px #00d4ff, 0 0 40px rgba(0, 212, 255, 0.3);
+    animation: corePulse 2s ease-in-out infinite;
+  }
+  .reactor.active .reactor-ring { border-top-color: #00ff88; }
+  .reactor.active .reactor-core {
+    background: radial-gradient(circle, #00ff88, #00d4ff);
+    box-shadow: 0 0 30px #00ff88, 0 0 60px rgba(0, 255, 136, 0.3);
+  }
+  @keyframes spin { to { transform: rotate(360deg); } }
+  @keyframes corePulse {
+    0%, 100% { transform: scale(1); opacity: 1; }
+    50% { transform: scale(1.2); opacity: 0.8; }
+  }
+  /* Chat area */
+  .chat-area {
+    flex: 1;
+    overflow-y: auto;
+    padding: 20px 0;
+    margin: 10px 0;
+    max-height: 55vh;
+    scrollbar-width: thin;
+    scrollbar-color: rgba(0, 212, 255, 0.3) transparent;
+  }
+  .chat-area::-webkit-scrollbar { width: 4px; }
+  .chat-area::-webkit-scrollbar-track { background: transparent; }
+  .chat-area::-webkit-scrollbar-thumb {
+    background: rgba(0, 212, 255, 0.3);
+    border-radius: 2px;
+  }
+  .message {
+    margin: 12px 0;
+    padding: 15px 20px;
+    border-radius: 12px;
+    animation: messageIn 0.4s cubic-bezier(0.22, 1, 0.36, 1);
+    position: relative;
+    line-height: 1.6;
+    font-size: 1.05em;
+  }
+  @keyframes messageIn {
+    from { opacity: 0; transform: translateY(15px); }
+    to { opacity: 1; transform: translateY(0); }
+  }
+  .message.user {
+    background: linear-gradient(135deg, rgba(0, 212, 255, 0.1), rgba(0, 100, 255, 0.05));
+    border: 1px solid rgba(0, 212, 255, 0.15);
+    margin-left: 60px;
+    color: #e0f0ff;
+  }
+  .message.user::before {
+    content: 'YOU';
+    position: absolute;
+    top: -8px;
+    right: 15px;
+    font-family: 'Orbitron', sans-serif;
+    font-size: 0.6em;
+    color: rgba(0, 212, 255, 0.5);
+    letter-spacing: 3px;
+  }
+  .message.jarvis {
+    background: linear-gradient(135deg, rgba(0, 255, 136, 0.05), rgba(0, 212, 255, 0.03));
+    border: 1px solid rgba(0, 255, 136, 0.1);
+    margin-right: 60px;
+    color: #d0ffe0;
+  }
+  .message.jarvis::before {
+    content: 'JARVIS';
+    position: absolute;
+    top: -8px;
+    left: 15px;
+    font-family: 'Orbitron', sans-serif;
+    font-size: 0.6em;
+    color: rgba(0, 255, 136, 0.5);
+    letter-spacing: 3px;
+  }
+  .message.system {
+    text-align: center;
+    color: rgba(0, 212, 255, 0.3);
+    font-size: 0.8em;
+    border: none;
+    background: none;
+    letter-spacing: 2px;
+  }
+  /* Audio player in message */
+  .audio-btn {
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+    margin-top: 10px;
+    padding: 6px 14px;
+    background: rgba(0, 212, 255, 0.1);
+    border: 1px solid rgba(0, 212, 255, 0.3);
+    border-radius: 20px;
+    color: #00d4ff;
+    cursor: pointer;
+    font-family: 'Rajdhani', sans-serif;
+    font-size: 0.85em;
+    letter-spacing: 1px;
+    transition: all 0.3s;
+  }
+  .audio-btn:hover {
+    background: rgba(0, 212, 255, 0.2);
+    box-shadow: 0 0 15px rgba(0, 212, 255, 0.2);
+  }
+  .audio-btn.playing {
+    background: rgba(0, 255, 136, 0.15);
+    border-color: rgba(0, 255, 136, 0.4);
+    color: #00ff88;
+  }
+  .audio-btn svg { width: 14px; height: 14px; fill: currentColor; }
+  /* Thinking indicator */
+  .thinking {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    padding: 15px 20px;
+    color: rgba(0, 212, 255, 0.5);
+    font-size: 0.9em;
+  }
+  .thinking-dots span {
+    display: inline-block;
+    width: 4px; height: 4px;
+    background: #00d4ff;
+    border-radius: 50%;
+    animation: thinkBounce 1.4s ease-in-out infinite;
+    margin: 0 2px;
+  }
+  .thinking-dots span:nth-child(2) { animation-delay: 0.2s; }
+  .thinking-dots span:nth-child(3) { animation-delay: 0.4s; }
+  @keyframes thinkBounce {
+    0%, 80%, 100% { transform: scale(0.6); opacity: 0.3; }
+    40% { transform: scale(1); opacity: 1; }
+  }
+  /* Input area */
+  .input-area {
+    padding: 15px 0;
+    position: relative;
+  }
+  .input-wrapper {
+    display: flex;
+    gap: 10px;
+    align-items: flex-end;
+    background: rgba(0, 212, 255, 0.03);
+    border: 1px solid rgba(0, 212, 255, 0.15);
+    border-radius: 16px;
+    padding: 8px;
+    transition: all 0.3s;
+  }
+  .input-wrapper:focus-within {
+    border-color: rgba(0, 212, 255, 0.4);
+    box-shadow: 0 0 20px rgba(0, 212, 255, 0.1);
+  }
+  #userInput {
+    flex: 1;
+    background: transparent;
+    border: none;
+    color: #e0f0ff;
+    font-family: 'Rajdhani', sans-serif;
+    font-size: 1.1em;
+    padding: 10px 15px;
+    outline: none;
+    resize: none;
+    max-height: 120px;
+    line-height: 1.5;
+  }
+  #userInput::placeholder {
+    color: rgba(0, 212, 255, 0.25);
+    letter-spacing: 1px;
+  }
+  .send-btn {
+    width: 48px;
+    height: 48px;
+    border-radius: 12px;
+    border: 1px solid rgba(0, 212, 255, 0.3);
+    background: linear-gradient(135deg, rgba(0, 212, 255, 0.15), rgba(0, 100, 255, 0.1));
+    color: #00d4ff;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    transition: all 0.3s;
+    flex-shrink: 0;
+  }
+  .send-btn:hover:not(:disabled) {
+    background: linear-gradient(135deg, rgba(0, 212, 255, 0.3), rgba(0, 100, 255, 0.2));
+    box-shadow: 0 0 20px rgba(0, 212, 255, 0.2);
+    transform: scale(1.05);
+  }
+  .send-btn:disabled {
+    opacity: 0.3;
+    cursor: not-allowed;
+  }
+  .send-btn svg { width: 20px; height: 20px; fill: currentColor; }
+  /* Voice toggle */
+  .controls {
+    display: flex;
+    justify-content: center;
+    gap: 15px;
+    margin-top: 10px;
+  }
+  .toggle-btn {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    padding: 6px 16px;
+    border-radius: 20px;
+    border: 1px solid rgba(0, 212, 255, 0.2);
+    background: rgba(0, 212, 255, 0.05);
+    color: rgba(0, 212, 255, 0.5);
+    cursor: pointer;
+    font-family: 'Rajdhani', sans-serif;
+    font-size: 0.8em;
+    letter-spacing: 2px;
+    transition: all 0.3s;
+  }
+  .toggle-btn.active {
+    border-color: rgba(0, 255, 136, 0.4);
+    background: rgba(0, 255, 136, 0.1);
+    color: #00ff88;
+  }
+  .toggle-btn svg { width: 14px; height: 14px; fill: currentColor; }
+  /* Waveform visualization */
+  .waveform {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 3px;
+    height: 30px;
+    margin: 5px 0;
+    opacity: 0;
+    transition: opacity 0.3s;
+  }
+  .waveform.active { opacity: 1; }
+  .waveform-bar {
+    width: 3px;
+    background: linear-gradient(to top, #00d4ff, #00ff88);
+    border-radius: 2px;
+    animation: wave 0.8s ease-in-out infinite;
+  }
+  @keyframes wave {
+    0%, 100% { height: 5px; }
+    50% { height: 25px; }
+  }
+  /* Mobile */
+  @media (max-width: 600px) {
+    .header h1 { font-size: 1.8em; letter-spacing: 8px; }
+    .message { margin-left: 10px !important; margin-right: 10px !important; }
+    .status-bar { gap: 15px; font-size: 0.65em; }
+  }
+</style>
+</head>
+<body>
+<div class="container">
+  <div class="header">
+    <h1>J.A.R.V.I.S.</h1>
+    <div class="subtitle">JUST A RATHER VERY INTELLIGENT SYSTEM</div>
+    <div class="status-bar">
+      <div class="status-item">
+        <div class="status-dot" id="statusDot"></div>
+        <span id="statusText">ONLINE</span>
+      </div>
+      <div class="status-item">
+        <span>NEURAL CORE v3.5</span>
+      </div>
+      <div class="status-item">
+        <span>VOICE SYNTHESIS ACTIVE</span>
+      </div>
+    </div>
+  </div>
+  <div class="reactor-container">
+    <div class="reactor" id="reactor">
+      <div class="reactor-ring"></div>
+      <div class="reactor-ring"></div>
+      <div class="reactor-ring"></div>
+      <div class="reactor-core"></div>
+    </div>
+  </div>
+  <div class="waveform" id="waveform">
+    <div class="waveform-bar" style="animation-delay: 0s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.1s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.2s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.3s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.4s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.3s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.2s"></div>
+    <div class="waveform-bar" style="animation-delay: 0.1s"></div>
+    <div class="waveform-bar" style="animation-delay: 0s"></div>
+  </div>
+  <div class="chat-area" id="chatArea">
+    <div class="message system">— SYSTEM INITIALIZED — AWAITING INPUT —</div>
+  </div>
+  <div class="input-area">
+    <div class="input-wrapper">
+      <textarea id="userInput" rows="1" placeholder="Speak to J.A.R.V.I.S. ..."
+                onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();sendMessage();}"></textarea>
+      <button class="send-btn" id="sendBtn" onclick="sendMessage()">
+        <svg viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg>
+      </button>
+    </div>
+    <div class="controls">
+      <button class="toggle-btn active" id="voiceToggle" onclick="toggleVoice()">
+        <svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02zM14 3.23v2.06c2.89.86 5 3.54 5 6.71s-2.11 5.85-5 6.71v2.06c4.01-.91 7-4.49 7-8.77s-2.99-7.86-7-8.77z"/></svg>
+        <span>VOICE</span>
+      </button>
+      <button class="toggle-btn active" id="autoplayToggle" onclick="toggleAutoplay()">
+        <svg viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
+        <span>AUTOPLAY</span>
+      </button>
+    </div>
+  </div>
+</div>
+<script>
+let voiceEnabled = true;
+let autoplayEnabled = true;
+let isProcessing = false;
+let currentAudio = null;
+let conversationHistory = [];
+const SYSTEM_PROMPT = `You are J.A.R.V.I.S. (Just A Rather Very Intelligent System), Tony Stark's AI assistant. You speak with a refined, witty, slightly British-accented personality. You are helpful, intelligent, and occasionally sardonic. Keep responses concise but informative. You address the user as "Sir" or "Ma'am" occasionally. You have a dry sense of humor. Never break character. Do not use emojis. Respond naturally and conversationally. Keep answers under 150 words unless asked for detail.`;
+function toggleVoice() {
+  voiceEnabled = !voiceEnabled;
+  document.getElementById('voiceToggle').classList.toggle('active', voiceEnabled);
+}
+function toggleAutoplay() {
+  autoplayEnabled = !autoplayEnabled;
+  document.getElementById('autoplayToggle').classList.toggle('active', autoplayEnabled);
 }
+function addMessage(text, type) {
+  const chatArea = document.getElementById('chatArea');
+  const div = document.createElement('div');
+  div.className = 'message ' + type;
+  div.textContent = text;
+  if (type === 'jarvis' && voiceEnabled) {
+    const audioBtn = document.createElement('div');
+    audioBtn.className = 'audio-btn';
+    audioBtn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
+    audioBtn.onclick = () => playVoice(text, audioBtn);
+    div.appendChild(audioBtn);
+    if (autoplayEnabled) {
+      setTimeout(() => playVoice(text, audioBtn), 300);
+    }
+  }
+  chatArea.appendChild(div);
+  chatArea.scrollTop = chatArea.scrollHeight;
+}
+function showThinking() {
+  const chatArea = document.getElementById('chatArea');
+  const div = document.createElement('div');
+  div.className = 'thinking';
+  div.id = 'thinkingIndicator';
+  div.innerHTML = 'Processing <div class="thinking-dots"><span></span><span></span><span></span></div>';
+  chatArea.appendChild(div);
+  chatArea.scrollTop = chatArea.scrollHeight;
+}
+function removeThinking() {
+  const el = document.getElementById('thinkingIndicator');
+  if (el) el.remove();
+}
+async function playVoice(text, btn) {
+  if (currentAudio) {
+    currentAudio.pause();
+    document.querySelectorAll('.audio-btn.playing').forEach(b => {
+      b.classList.remove('playing');
+      b.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
+    });
+  }
+  btn.classList.add('playing');
+  btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg> SPEAKING...';
+  document.getElementById('waveform').classList.add('active');
+  document.getElementById('reactor').classList.add('active');
+  try {
+    const resp = await fetch('/tts', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({text: text})
+    });
+    const blob = await resp.blob();
+    const url = URL.createObjectURL(blob);
+    currentAudio = new Audio(url);
+    currentAudio.playbackRate = 1.1;
+    currentAudio.onended = () => {
+      btn.classList.remove('playing');
+      btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> PLAY VOICE';
+      document.getElementById('waveform').classList.remove('active');
+      document.getElementById('reactor').classList.remove('active');
+      currentAudio = null;
+    };
+    await currentAudio.play();
+  } catch(e) {
+    console.error('TTS error:', e);
+    btn.classList.remove('playing');
+    btn.innerHTML = '<svg viewBox="0 0 24 24"><path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z"/></svg> RETRY';
+    document.getElementById('waveform').classList.remove('active');
+    document.getElementById('reactor').classList.remove('active');
+  }
+}
+async function sendMessage() {
+  if (isProcessing) return;
+  const input = document.getElementById('userInput');
+  const text = input.value.trim();
+  if (!text) return;
+  input.value = '';
+  input.style.height = 'auto';
+  isProcessing = true;
+  document.getElementById('sendBtn').disabled = true;
+  document.getElementById('statusDot').classList.add('processing');
+  document.getElementById('statusText').textContent = 'PROCESSING';
+  addMessage(text, 'user');
+  conversationHistory.push({role: 'user', content: text});
+  showThinking();
+  try {
+    const messages = [
+      {role: 'system', content: SYSTEM_PROMPT},
+      ...conversationHistory.slice(-10)
+    ];
+    const resp = await fetch('/chat', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({messages: messages})
+    });
+    const data = await resp.json();
+    removeThinking();
+    let reply = data.reply || 'I apologize, Sir. My neural pathways seem to be experiencing a momentary disruption.';
+    // Clean up thinking tags if present
+    reply = reply.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+    conversationHistory.push({role: 'assistant', content: reply});
+    addMessage(reply, 'jarvis');
+  } catch(e) {
+    removeThinking();
+    addMessage('Systems experiencing interference. Please try again, Sir.', 'jarvis');
+  }
+  isProcessing = false;
+  document.getElementById('sendBtn').disabled = false;
+  document.getElementById('statusDot').classList.remove('processing');
+  document.getElementById('statusText').textContent = 'ONLINE';
+}
+// Auto-resize textarea
+document.getElementById('userInput').addEventListener('input', function() {
+  this.style.height = 'auto';
+  this.style.height = Math.min(this.scrollHeight, 120) + 'px';
+});
+// Initial greeting
+setTimeout(() => {
+  addMessage('Good day. J.A.R.V.I.S. at your service. All systems are nominal and ready to assist. What can I do for you?', 'jarvis');
+}, 1000);
+</script>
+</body>
+</html>
+"""
+@app.route('/')
+def index():
+    return render_template_string(HTML_PAGE)
+@app.route('/chat', methods=['POST'])
+def chat():
+    data = request.json
+    messages = data.get('messages', [])
     try:
+        resp = requests.post(
+            f"{LLAMA_URL}/v1/chat/completions",
+            json={
+                "model": "jarvis",
+                "messages": messages,
+                "max_tokens": 512,
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "stream": False
+            },
+            timeout=120
         )
+        result = resp.json()
+        reply = result['choices'][0]['message']['content']
     except Exception as e:
+        print(f"LLM Error: {e}")
+        reply = "I'm experiencing a temporary system fluctuation, Sir. Please try again."
+    return jsonify({"reply": reply})
+@app.route('/tts', methods=['POST'])
 def tts():
+    data = request.json
+    text = data.get('text', '')
     if not text:
+        return jsonify({"error": "No text"}), 400
+    # Clean text for TTS
+    text = text.replace('"', '').replace('*', '').replace('#', '')
+    # Limit length for CPU speed
+    if len(text) > 1000:
+        text = text[:1000] + '.'
     try:
+        # Use piper CLI for TTS - it's fast on CPU
+        proc = subprocess.run(
+            [
+                'piper',
+                '--model', VOICE_MODEL,
+                '--config', VOICE_CONFIG,
+                '--output_file', '/tmp/tts_output.wav',
+                '--length-scale', '0.85',
+                '--sentence-silence', '0.15'
+            ],
+            input=text.encode('utf-8'),
+            capture_output=True,
+            timeout=60
         )
+        if proc.returncode != 0:
+            print(f"Piper error: {proc.stderr.decode()}")
+            return jsonify({"error": "TTS failed"}), 500
+        return send_file(
+            '/tmp/tts_output.wav',
+            mimetype='audio/wav',
+            as_attachment=False,
+            download_name='jarvis_voice.wav'
+        )
+    except subprocess.TimeoutExpired:
+        return jsonify({"error": "TTS timeout"}), 500
     except Exception as e:
+        print(f"TTS Error: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/health')
+def health():
+    return jsonify({"status": "ok"})
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7861)
+PYTHON_APP
+# Create startup script
+RUN cat << 'STARTUP' > /app/start.sh
+#!/bin/bash
+set -e
+echo "========================================="
+echo "  J.A.R.V.I.S. SYSTEM INITIALIZATION"
+echo "========================================="
+# Start llama.cpp server in background
+echo "[BOOT] Starting Neural Core (LLM)..."
+/app/llama-server \
+    -m /app/Qwen3.5-0.8B-UD-Q5_K_XL.gguf \
+    --host 0.0.0.0 \
+    --port 8080 \
+    -t 2 \
+    --cache-type-k q8_0 \
+    --cache-type-v q4_0 \
+    -c 4096 \
+    -n 2048 \
+    --no-mmap &
+LLAMA_PID=$!
+# Wait for llama.cpp to be ready
+echo "[BOOT] Waiting for Neural Core..."
+for i in $(seq 1 60); do
+    if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
+        echo "[BOOT] Neural Core ONLINE"
+        break
+    fi
+    sleep 2
+done
+# Start Flask app (the main UI on port 7860)
+echo "[BOOT] Starting Voice Interface..."
+cd /app
+source /opt/venv/bin/activate
+# Use a simple redirect: Flask on 7861, and we reverse proxy
+# Actually, let's just run Flask on 7860 directly
+python3 jarvis.py &
+FLASK_PID=$!
+echo "========================================="
+echo "  J.A.R.V.I.S. SYSTEMS ONLINE"
+echo "  UI: http://0.0.0.0:7860"
+echo "  LLM: http://0.0.0.0:8080"
+echo "========================================="
+# Wait for either process
+wait -n $LLAMA_PID $FLASK_PID
+STARTUP
+RUN chmod +x /app/start.sh
+# Fix the Flask port to 7860 (HF Spaces expects this)
+RUN sed -i "s/port=7861/port=7860/" /app/jarvis.py
+# Reduce context size for free CPU tier
 EXPOSE 7860
+CMD ["/bin/bash", "/app/start.sh"]