Spaces:

multija
/

locutor

Build error

App Files Files Community

multija commited on Aug 28, 2025

Commit

1f065b7

verified ·

1 Parent(s): c02f75f

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -168

app.py CHANGED Viewed

@@ -1,187 +1,78 @@
 import os
-import sys
-import time
-import base64
 import logging
 import traceback
-from inspect import signature
-import requests
-import soundfile as sf
-import numpy as np
-from fastapi import FastAPI, Body, HTTPException
-from fastapi.responses import FileResponse
-# Se a instalação via pip não expuser f5_tts.inference, forçar src path (já clonamos no Dockerfile)
-# sys.path.append("/F5-TTS/src")  # se precisar forçar, descomente
-# Tenta importar inference de forma robusta
-try:
-    # preferência: f5_tts.inference (de repo)
-    from f5_tts.inference import inference
-except Exception:
-    # fallback: tentar importar o pacote instalado
-    try:
-        from f5_tts.api import inference  # caso esteja exposto assim
-    except Exception:
-        inference = None
-# --- Config ---
 logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("f5tts-api")
-MODEL_URL  = "https://huggingface.co/firstpixel/F5-TTS-pt-br/resolve/main/pt-br/model_last.safetensors"
-CONFIG_URL = "https://raw.githubusercontent.com/SWivid/F5-TTS/refs/heads/main/src/f5_tts/configs/F5TTS_Base.yaml"
-VOCAB_URL  = "https://huggingface.co/SWivid/F5-TTS/raw/main/F5TTS_Base/vocab.txt"
-MODEL_FILE  = "model/model_last.safetensors"
-CONFIG_FILE = "config/config.yaml"
-VOCAB_FILE  = "vocab/vocab.txt"
-OUTPUT_FILE = "output/output.wav"
-os.makedirs("model", exist_ok=True)
-os.makedirs("config", exist_ok=True)
-os.makedirs("vocab", exist_ok=True)
-os.makedirs("output", exist_ok=True)
-def download_file(url, dest_path, retry=2):
-    if os.path.exists(dest_path):
-        log.info(f"Arquivo já existe: {dest_path}")
-        return dest_path
-    for attempt in range(1, retry+1):
         try:
-            log.info(f"Baixando {url} -> {dest_path} (tentativa {attempt})")
-            r = requests.get(url, stream=True, timeout=120)
-            r.raise_for_status()
-            with open(dest_path, "wb") as f:
-                for chunk in r.iter_content(chunk_size=8192):
-                    if chunk:
-                        f.write(chunk)
-            log.info(f"Download concluído: {dest_path}")
-            return dest_path
-        except Exception as e:
-            log.warning(f"Falha download ({attempt}/{retry}): {e}")
-            if attempt == retry:
                 raise
-            time.sleep(3)
-    return dest_path
-# baixar se necessário
-try:
-    download_file(MODEL_URL, MODEL_FILE)
-    download_file(CONFIG_URL, CONFIG_FILE)
-    download_file(VOCAB_URL, VOCAB_FILE)
-except Exception as e:
-    log.error("Erro ao baixar arquivos iniciais: %s", e)
-    log.debug(traceback.format_exc())
-    # não aborta a inicialização - se o modelo for provido manualmente, ainda pode funcionar
-# Certifica que temos a função inference
-if inference is None:
-    # tentativa de importar novamente via sys.path ajustado (se o repo estiver copiado)
-    try:
-        sys.path.append("/F5-TTS/src")
-        from f5_tts.inference import inference
-        log.info("Import f5_tts.inference via /F5-TTS/src OK")
-    except Exception:
-        log.error("Não foi possível importar f5_tts.inference. Verifique instalação do F5-TTS.")
-        log.debug(traceback.format_exc())
-        raise RuntimeError("inference function not available (f5_tts)")
-# Detecta dispositivo disponível (por segurança usa CPU se não tiver CUDA)
-try:
-    import torch
-    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-except Exception:
-    DEVICE = "cpu"
-log.info(f"Device selecionado: {DEVICE}")
-# Função genérica que adapta kwargs à assinatura da função inference
-def call_inference_dynamic(text, model_path, config_path, vocab_path, device):
-    sig = signature(inference)
-    params = sig.parameters.keys()
-    # montar mapping de possíveis nomes usados no repo
-    candidate_kwargs = {
-        "text": text,
-        "model_path": model_path,
-        "config_path": config_path,
-        "vocab_path": vocab_path,
-        "vocab_file": vocab_path,
-        "vocab": vocab_path,
-        "model_file": model_path,
-        "config_file": config_path,
-        "device": device,
-        "use_cuda": True if device == "cuda" else False,
-    }
-    # escolher apenas keys que a função aceita
-    kwargs = {k: v for k, v in candidate_kwargs.items() if k in params}
-    log.debug(f"Chamando inference com args: {list(kwargs.keys())}")
-    result = inference(**kwargs)
-    return result
-# helper para normalizar retorno
-def normalize_inference_output(result):
-    """
-    Pode retornar:
-    - numpy array (waveform)  -> assume sr 24000
-    - tuple (waveform, sr)
-    - list-like
-    """
-    if result is None:
-        raise RuntimeError("Inference retornou None")
-    # se for tuple (audio, sr)
-    if isinstance(result, tuple) or isinstance(result, list):
-        if len(result) >= 2 and isinstance(result[0], (list, tuple, np.ndarray)):
-            audio = np.asarray(result[0], dtype=np.float32)
-            sr = int(result[1])
-            return audio, sr
-    # se for numpy array ou list
-    if isinstance(result, np.ndarray):
-        return result, 24000
-    if isinstance(result, (list, tuple)):
-        arr = np.asarray(result, dtype=np.float32)
-        return arr, 24000
-    # caso diferente
-    raise RuntimeError("Formato de retorno inesperado da inference()")
-# inicializar FastAPI
-app = FastAPI(title="F5-TTS Minimal API")
 @app.get("/health")
 def health():
-    return {"status": "ok", "device": DEVICE}
 @app.post("/tts")
-def tts_endpoint(text: str = Body(..., embed=True), return_base64: bool = Body(False, embed=True)):
-    """
-    Recebe JSON { "text": "...", "return_base64": false }.
-    Gera output/output.wav e retorna FileResponse (ou base64 se pedir).
-    """
-    if not text or not text.strip():
-        raise HTTPException(status_code=400, detail="Campo 'text' vazio.")
-    try:
-        log.info("Gerando áudio para texto (len=%d)", len(text))
-        result = call_inference_dynamic(
-            text=text,
-            model_path=MODEL_FILE,
-            config_path=CONFIG_FILE,
-            vocab_path=VOCAB_FILE,
-            device=DEVICE
-        )
-        audio, sr = normalize_inference_output(result)
-        # salvar wav
-        out_path = OUTPUT_FILE
-        sf.write(out_path, audio, sr)
-        log.info("Arquivo salvo: %s (sr=%d, samples=%d)", out_path, sr, audio.shape[0])
-        if return_base64:
-            with open(out_path, "rb") as f:
-                b = base64.b64encode(f.read()).decode("ascii")
-            return {"filename": os.path.basename(out_path), "audio_base64": b}
-        # retorna arquivo
-        return FileResponse(out_path, media_type="audio/wav", filename=os.path.basename(out_path))
-    except Exception as e:
-        log.error("Erro na geração TTS: %s", e)
-        log.debug(traceback.format_exc())
-        raise HTTPException(status_code=500, detail=str(e))

 import os
 import logging
 import traceback
 logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("app")
+# caminhos preferidos
+PREFERRED_DIRS = ["model", "config", "vocab", "output"]
+def ensure_dirs(base_dirs=None):
+    base_dirs = base_dirs or PREFERRED_DIRS
+    created = {}
+    for d in base_dirs:
         try:
+            os.makedirs(d, exist_ok=True)
+            # testar escrita
+            testfile = os.path.join(d, ".perm_test")
+            with open(testfile, "w") as f:
+                f.write("ok")
+            os.remove(testfile)
+            created[d] = d
+            log.info("Diretório pronto: %s", d)
+        except PermissionError:
+            log.warning("Sem permissão para criar %s", d)
+            created[d] = None
+        except Exception:
+            log.error("Erro criando %s: %s", d, traceback.format_exc())
+            created[d] = None
+    # se algum não foi criado, usar /tmp/<nome>
+    for k, v in list(created.items()):
+        if v is None:
+            alt = os.path.join("/tmp", k)
+            try:
+                os.makedirs(alt, exist_ok=True)
+                created[k] = alt
+                log.info("Usando fallback %s para %s", alt, k)
+            except Exception:
+                log.error("Não foi possível criar fallback %s", alt)
                 raise
+    return created
+# chama no início
+DIRS = ensure_dirs()
+MODEL_DIR = DIRS["model"]
+CONFIG_DIR = DIRS["config"]
+VOCAB_DIR = DIRS["vocab"]
+OUTPUT_DIR = DIRS["output"]
+MODEL_FILE = os.path.join(MODEL_DIR, "model_last.safetensors")
+CONFIG_FILE = os.path.join(CONFIG_DIR, "config.yaml")
+VOCAB_FILE = os.path.join(VOCAB_DIR, "vocab.txt")
+OUTPUT_FILE = os.path.join(OUTPUT_DIR, "output.wav")
+# segue resto do seu app (import fastapi etc.)
+from fastapi import FastAPI, Body, HTTPException
+from fastapi.responses import FileResponse
+app = FastAPI()
+# ... (coloque aqui o seu código que faz download do modelo / inicializa TTS etc.)
+# Exemplo simples de endpoint:
 @app.get("/health")
 def health():
+    return {"status": "ok", "paths": {"model": MODEL_FILE, "output": OUTPUT_FILE}}
+# Endpoint /tts de exemplo (adapte para sua inference)
 @app.post("/tts")
+def tts(text: str = Body(..., embed=True)):
+    if not text.strip():
+        raise HTTPException(status_code=400, detail="text vazio")
+    # aqui você chama sua função de inferência que escreve OUTPUT_FILE
+    # por exemplo: tts_model.tts_to_file(text=text, file_path=OUTPUT_FILE)
+    # para demo, apenas cria um arquivo vazio (substitua isso)
+    with open(OUTPUT_FILE, "wb") as f:
+        f.write(b"")  # <-- substitua pelo áudio real
+    return FileResponse(OUTPUT_FILE, media_type="audio/wav", filename=os.path.basename(OUTPUT_FILE))