vox-beta

Running

App Files Files Community

Joe6636564 commited on Dec 18, 2025

Commit

a582f4a

verified ·

1 Parent(s): 86693c5

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -271

app.py CHANGED Viewed

@@ -1,14 +1,13 @@
 from flask import Flask, request, jsonify, render_template
-from datetime import datetime
 from flask_cors import CORS
 from TTS.api import TTS
-from TTS.utils.manage import ModelManager
 import os
 import base64
-import shutil
 import wave
 import logging
 import threading
 from helper import (
     save_audio,
@@ -19,327 +18,199 @@ from helper import (
     ensure_wav_format,
 )
-# ---------- Basic config ----------
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("app")
 app = Flask(__name__)
 CORS(app)
-os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cpu"
-# ============================================================
-# MODEL STORAGE PATHS & NAMES
-# ============================================================
-DATASET_MODEL_DIR = "/datasets/EllenBeta/Xtts_2/model"  # dataset mount (destination)
-LOCAL_CACHE_DIR = os.path.expanduser("~/.local/share/tts/xtts_v2_cache")  # local cache
-MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"  # coqui model id
-# Maximum audio (MB)
 MAX_AUDIO_SIZE_MB = 15
-# ============================================================
-# Utilities for resolving model download path (defensive)
-# ============================================================
-def resolve_model_path(raw):
-    """
-    Given the return value from ModelManager.download_model(...) try to
-    return a filesystem path (string) pointing at the downloaded model folder.
-    Handles strings, tuples/lists, or dict-like returns.
-    """
-    # If already a path string
-    if isinstance(raw, str):
-        return raw
-    # If a list/tuple, try first string-like element
-    if isinstance(raw, (list, tuple)):
-        for element in raw:
-            if isinstance(element, str) and os.path.exists(element):
-                return element
-        # fallback: try to join tuple items into a path if meaningful
-        try:
-            cand = os.path.join(*[str(x) for x in raw])
-            if os.path.exists(cand):
-                return cand
-        except Exception:
-            pass
-    # If dict-like, try common keys
-    if isinstance(raw, dict):
-        for key in ("model_path", "path", "directory"):
-            val = raw.get(key)
-            if isinstance(val, str) and os.path.exists(val):
-                return val
-    # final fallback: try to find the typical download directory
-    fallback = os.path.expanduser("~/.local/share/tts")
-    if os.path.exists(fallback):
-        # find matching folder
-        for root, dirs, files in os.walk(fallback):
-            if MODEL_NAME.split("/")[-1] in root:
-                return root
-    # Nothing found
-    return None
-# ============================================================
-# Ensure model is present (download once and copy into dataset)
-# ============================================================
 tts = None
-try:
-    if os.path.exists(DATASET_MODEL_DIR) and os.listdir(DATASET_MODEL_DIR):
-        log.info("✅ Loading XTTS model directly from dataset mount: %s", DATASET_MODEL_DIR)
-        tts = TTS(model_path=DATASET_MODEL_DIR).to(device)
-    else:
-        log.info("⬇️ Dataset model not found — downloading XTTS model (first run)...")
-        manager = ModelManager()
-        raw_path = manager.download_model(MODEL_NAME)
-        model_path = resolve_model_path(raw_path)
-        if not model_path or not os.path.exists(model_path):
-            # As a robust fallback, call TTS() with model id then try to locate typical folder
-            log.warning("Could not resolve model path from ModelManager result; falling back to direct TTS init.")
-            tts_tmp = TTS(MODEL_NAME).to(device)
-            # try to locate in default coqui location
-            candidate = os.path.expanduser("~/.local/share/tts")
-            model_path = None
-            if os.path.exists(candidate):
-                # pick the directory that contains the xtts_v2 name
-                for root, dirs, files in os.walk(candidate):
-                    if "xtts_v2" in root or "xtts" in root:
-                        model_path = root
-                        break
-            # if still None, set model_path to candidate root
-            if not model_path:
-                model_path = candidate
-            # assign tts from tts_tmp
-            tts = tts_tmp
-        # Ensure model_path now points to a directory
-        if model_path and os.path.exists(model_path):
-            # create local cache dir and copy files (ensure string)
-            os.makedirs(LOCAL_CACHE_DIR, exist_ok=True)
-            try:
-                shutil.copytree(model_path, LOCAL_CACHE_DIR, dirs_exist_ok=True)
-            except Exception as e:
-                # if copytree fails (we still continue)
-                log.warning("Copy to LOCAL_CACHE_DIR failed: %s", e)
-            # Copy into dataset mount for persistence (if writable)
-            try:
-                os.makedirs(DATASET_MODEL_DIR, exist_ok=True)
-                for item in os.listdir(model_path):
-                    s = os.path.join(model_path, item)
-                    d = os.path.join(DATASET_MODEL_DIR, item)
-                    if os.path.isdir(s):
-                        shutil.copytree(s, d, dirs_exist_ok=True)
-                    else:
-                        shutil.copy2(s, d)
-                log.info("📦 Model copied into dataset mount: %s", DATASET_MODEL_DIR)
-            except Exception as e:
-                log.warning("Could not copy model into dataset mount (may be read-only or missing perms): %s", e)
-            # If tts not already set (from fallback), initialize from model_path or dataset mount
-            if tts is None:
-                # prefer dataset dir if copy succeeded, otherwise local cache
-                init_path = DATASET_MODEL_DIR if os.path.exists(DATASET_MODEL_DIR) and os.listdir(DATASET_MODEL_DIR) else LOCAL_CACHE_DIR
-                tts = TTS(model_path=init_path).to(device)
-        else:
-            # final fallback: initialize directly from model name (internet)
-            log.warning("Could not find downloaded model folder; initializing TTS from model id directly.")
-            tts = TTS(MODEL_NAME).to(device)
-    log.info("✅ TTS ready.")
-except Exception as exc:
-    log.exception("Failed to prepare TTS model: %s", exc)
-    # Try a minimal fallback to avoid crash - attempt to init directly.
-    try:
-        tts = TTS(MODEL_NAME).to(device)
-    except Exception as exc2:
-        log.exception("Fatal: TTS could not be initialized: %s", exc2)
-        # re-raise so app startup fails loudly (preferred)
-        raise
-# ============================================================
-# Application logic (routes & helpers)
-# ============================================================
-active_tasks = {}
 @app.route("/")
 def greet_html():
     return render_template("home.html")
 @app.route("/sign-in")
 def sign_in():
     return render_template("sign_in.html")
 @app.route("/user_dash")
 def user_dash():
     user_id = request.args.get("user_id")
-    if user_id:
-        return render_template("u_dash.html", user_id=user_id)
-    return jsonify({"error": "Missing user_id"}), 400
 @app.route("/generate_voice", methods=["POST"])
 def generate_voice():
-    try:
-        data = request.get_json()
-        if not data:
-            return jsonify({"error": "No JSON body"}), 400
-        video = data.get("video")
-        text = data.get("text")
-        audio_base64 = data.get("audio")
-        task_id = data.get("task_id")
-        user_id = data.get("user_id")
-        if not user_id:
-            return jsonify({"error": "You must sign in before using this AI"}), 401
-        if not text:
-            return jsonify({"error": "Please input a prompt"}), 400
-        if not task_id:
-            return jsonify({"error": "task_id is required"}), 400
         if task_id in active_tasks:
-            return jsonify({"error": f"There is already an active task for {task_id}"}), 409
         active_tasks[task_id] = {
-            "user_id": user_id,
-            "status": "Processing",
             "created_at": datetime.now(),
         }
-        # Run processing (synchronous here - see note below about background processing)
-        process_vox(user_id, text, video, audio_base64, task_id)
-        return jsonify({"message": "Processing started", "task_id": task_id}), 202
-    except Exception as e:
-        log.exception("generate_voice error: %s", e)
-        return jsonify({"error": str(e)}), 500
 def process_vox(user_id, text, video, audio_base64, task_id):
-    temp_audio_path = None
     try:
-        # 1) Prepare input audio
         if audio_base64:
-            if audio_base64.startswith("data:audio/"):
                 audio_base64 = audio_base64.split(",", 1)[1]
-            temp_audio_path = f"/tmp/temp_ref_{task_id}.wav"
-            with open(temp_audio_path, "wb") as f:
                 f.write(base64.b64decode(audio_base64))
         elif video:
-            temp_audio_path = video_to_audio(video, output_path=None)
-        # 2) Ensure WAV and validate
-        temp_audio_path = ensure_wav_format(temp_audio_path)
-        valid, msg = validate_audio_file(temp_audio_path, MAX_AUDIO_SIZE_MB)
         if not valid:
-            raise Exception(f"Invalid audio file: {msg}")
-        # 3) Generate TTS (clone)
-        result_file = clone(text, temp_audio_path)
-        # 4) Save output to user_audios
-        out_dir = "user_audios"
-        os.makedirs(out_dir, exist_ok=True)
-        file_name = generate_random_filename("mp3")
-        file_path = os.path.join(out_dir, file_name)
-        with open(result_file, "rb") as src, open(file_path, "wb") as dst:
-            dst.write(src.read())
-        # 5) Gather metadata
-        with wave.open(file_path, "rb") as wf:
-            dura = wf.getnframes() / float(wf.getframerate())
-            duration = f"{dura:.2f}"
-            title = text[:20]
-        # 6) Upload and save
-        audio_url = save_to_dataset_repo(file_path, f"user/data/audios/{file_name}", file_name)
-        active_tasks[task_id].update(
-            {
                 "status": "completed",
                 "audio_url": audio_url,
-                "completion_time": datetime.now(),
-            }
-        )
-        save_audio(user_id, audio_url, title or "Audio", text, duration)
     except Exception as e:
-        log.exception("process_vox failed: %s", e)
-        active_tasks[task_id] = {
-            "status": "failed",
-            "error": str(e),
-            "completion_time": datetime.now(),
-        }
     finally:
-        # cleanup
-        try:
-            if temp_audio_path and os.path.exists(temp_audio_path):
-                os.remove(temp_audio_path)
-            task = active_tasks.get(task_id)
-            if task:
-                if task["status"]== "completed":
-                    remove_task_after_delay(task_id, delay_seconds=300)
-                elif task["status"] == "failed":
-                    del active_tasks[task_id]
-        except Exception:
-            # ignore cleanup issues
-            pass
-def clone(text, audio):
-    """
-    Use the TTS instance to produce an output file. Returns the path to the output file.
-    """
-    out_path = "./output.wav"
-    # use tts to write audio; let TTS manage model specifics
-    tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=out_path)
-    return out_path
 @app.route("/task_status")
 def task_status():
     task_id = request.args.get("task_id")
     if not task_id:
-        return jsonify({"error": "task_id parameter is required"}), 400
-    if task_id not in active_tasks:
-        return jsonify({"status": "not found"}), 404
-    task = active_tasks[task_id]
-    response_data = {
-        "status": task["status"],
-        "start_time": task.get("created_at").isoformat() if task.get("created_at") else None,
-    }
-    if task["status"] == "completed":
-        response_data["audio_url"] = task.get("audio_url")
-        response_data["completion_time"] = (
-            task.get("completion_time").isoformat() if task.get("completion_time") else None
-        )
-    elif task["status"] == "failed":
-        response_data["error"] = task.get("error")
-        response_data["completion_time"] = (
-            task.get("completion_time").isoformat() if task.get("completion_time") else None
-        )
-    return jsonify(response_data)
-def remove_task_after_delay(task_id, delay_seconds=300):
-    def remove_task():
-        if task_id in active_tasks:
-            del active_tasks[task_id]
-            log.info(f"Task {task_id} auto-deleted after {delay_seconds} seconds.")
-    timer = threading.Timer(delay_seconds, remove_task)
-    timer.start()
-# Run only when invoked directly (Gunicorn will ignore this block)

 from flask import Flask, request, jsonify, render_template
 from flask_cors import CORS
+from datetime import datetime
 from TTS.api import TTS
 import os
 import base64
 import wave
 import logging
 import threading
+from uuid import uuid4
 from helper import (
     save_audio,
     ensure_wav_format,
 )
+# --------------------------------------------------
+# Basic config
+# --------------------------------------------------
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("app")
 app = Flask(__name__)
 CORS(app)
+os.environ["COQUI_TOS_AGREED"] = "1"
 device = "cpu"
+MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
 MAX_AUDIO_SIZE_MB = 15
+# --------------------------------------------------
+# Global state (thread-safe)
+# --------------------------------------------------
 tts = None
+tts_lock = threading.Lock()
+active_tasks = {}
+tasks_lock = threading.Lock()
+# --------------------------------------------------
+# Lazy-load XTTS (ONLY ONCE)
+# --------------------------------------------------
+def get_tts():
+    global tts
+    with tts_lock:
+        if tts is None:
+            log.info("🔊 Loading XTTS model (this takes time)...")
+            tts = TTS(model_name=MODEL_NAME).to(device)
+            log.info("✅ XTTS loaded")
+    return tts
+# --------------------------------------------------
+# Routes
+# --------------------------------------------------
 @app.route("/")
 def greet_html():
     return render_template("home.html")
 @app.route("/sign-in")
 def sign_in():
     return render_template("sign_in.html")
 @app.route("/user_dash")
 def user_dash():
     user_id = request.args.get("user_id")
+    if not user_id:
+        return jsonify({"error": "Missing user_id"}), 400
+    return render_template("u_dash.html", user_id=user_id)
+# --------------------------------------------------
+# Generate Voice (NON-BLOCKING)
+# --------------------------------------------------
 @app.route("/generate_voice", methods=["POST"])
 def generate_voice():
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No JSON body"}), 400
+    user_id = data.get("user_id")
+    text = data.get("text")
+    audio_base64 = data.get("audio")
+    video = data.get("video")
+    task_id = data.get("task_id")
+    if not user_id:
+        return jsonify({"error": "You must sign in"}), 401
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    if not task_id:
+        return jsonify({"error": "task_id required"}), 400
+    with tasks_lock:
         if task_id in active_tasks:
+            return jsonify({"error": "Task already running"}), 409
         active_tasks[task_id] = {
+            "status": "processing",
             "created_at": datetime.now(),
         }
+    threading.Thread(
+        target=process_vox,
+        args=(user_id, text, video, audio_base64, task_id),
+        daemon=True
+    ).start()
+    return jsonify({"message": "Processing started", "task_id": task_id}), 202
+# --------------------------------------------------
+# Background Processor
+# --------------------------------------------------
 def process_vox(user_id, text, video, audio_base64, task_id):
+    ref_audio = None
+    out_file = None
     try:
+        # 1️⃣ Prepare reference audio
         if audio_base64:
+            if audio_base64.startswith("data:audio"):
                 audio_base64 = audio_base64.split(",", 1)[1]
+            ref_audio = f"/tmp/ref_{uuid4().hex}.wav"
+            with open(ref_audio, "wb") as f:
                 f.write(base64.b64decode(audio_base64))
         elif video:
+            ref_audio = video_to_audio(video)
+        ref_audio = ensure_wav_format(ref_audio)
+        valid, msg = validate_audio_file(ref_audio, MAX_AUDIO_SIZE_MB)
         if not valid:
+            raise Exception(msg)
+        # 2️⃣ Generate TTS
+        out_file = f"/tmp/tts_{uuid4().hex}.wav"
+        tts = get_tts()
+        tts.tts_to_file(
+            text=text,
+            speaker_wav=ref_audio,
+            language="en",
+            file_path=out_file
+        )
+        # 3️⃣ Duration
+        with wave.open(out_file, "rb") as wf:
+            duration = wf.getnframes() / wf.getframerate()
+        # 4️⃣ Save & upload
+        os.makedirs("user_audios", exist_ok=True)
+        file_name = generate_random_filename("wav")
+        final_path = os.path.join("user_audios", file_name)
+        os.rename(out_file, final_path)
+        audio_url = save_to_dataset_repo(
+            final_path,
+            f"user/data/audios/{file_name}",
+            file_name
+        )
+        save_audio(
+            user_id,
+            audio_url,
+            text[:20],
+            text,
+            f"{duration:.2f}"
+        )
+        with tasks_lock:
+            active_tasks[task_id].update({
                 "status": "completed",
                 "audio_url": audio_url,
+                "completed_at": datetime.now()
+            })
+        remove_task_after_delay(task_id)
     except Exception as e:
+        log.exception("TTS failed")
+        with tasks_lock:
+            active_tasks[task_id] = {
+                "status": "failed",
+                "error": str(e)
+            }
     finally:
+        for f in (ref_audio, out_file):
+            if f and os.path.exists(f):
+                os.remove(f)
+# --------------------------------------------------
+# Task Status
+# --------------------------------------------------
 @app.route("/task_status")
 def task_status():
     task_id = request.args.get("task_id")
     if not task_id:
+        return jsonify({"error": "task_id required"}), 400
+    with tasks_lock:
+        task = active_tasks.get(task_id)
+    if not task:
+        return jsonify({"status": "not found"}), 404
+    return jsonify(task)
+# --------------------------------------------------
+# Auto-clean tasks
+# --------------------------------------------------
+def remove_task_after_delay(task_id, delay=300):
+    def cleanup():
+        with tasks_lock:
+            active_tasks.pop(task_id, None)
+    threading.Timer(delay, cleanup).start()