Spaces:

Wanderhalleylee
/

transcrevi-api

Running

File size: 10,057 Bytes

import os
import time
import tempfile
import subprocess
import json
import re
from flask import Flask, request, jsonify
from flask_cors import CORS

# =====================================================
#  CONFIGURAÇÃO
# =====================================================
API_KEY = os.environ.get("API_KEY", "")
MAX_FILE_SIZE_MB = 5120
MAX_DURATION_SEC = 8000
VALID_EXTENSIONS = {
    "mp4", "mkv", "avi", "mov", "wmv", "flv",
    "webm", "m4v", "mp3", "wav", "ogg", "m4a",
    "aac", "wma", "flac"
}
VALID_MODELS = ["tiny", "base", "small", "medium"]
DEFAULT_MODEL = "base"

# Correções comuns em PT-BR
PTBR_CORRECTIONS = {
    r'\bpijão\b': 'pijamão',
    r'\bpijao\b': 'pijamão',
    r'\bta\b': 'tá',
    r'\bvc\b': 'você',
    r'\btô\b': 'tô',
    r'\bné\b': 'né',
    r'\bdto\b': 'direito',
    r'\besqdo\b': 'esquerdo',
}

# =====================================================
#  APP FLASK
# =====================================================
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

# =====================================================
#  CACHE DE MODELOS
# =====================================================
_models = {}


def get_model(name="base"):
    import whisper
    import torch
    if name not in _models:
        print(f"[INFO] Carregando modelo '{name}'...")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        _models[name] = whisper.load_model(name, device=device)
        print(f"[INFO] Modelo '{name}' carregado no dispositivo: {device}")
    return _models[name]


# =====================================================
#  FUNÇÕES AUXILIARES
# =====================================================
def format_timestamp(seconds):
    hrs = int(seconds // 3600)
    mins = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millis = int((seconds - int(seconds)) * 1000)
    return f"{hrs:02d}:{mins:02d}:{secs:02d},{millis:03d}"


def generate_srt(segments):
    srt_lines = []
    for i, seg in enumerate(segments, 1):
        start = format_timestamp(seg["start"])
        end = format_timestamp(seg["end"])
        text = seg["text"].strip()
        srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n")
    return "\n".join(srt_lines)


def apply_ptbr_corrections(text):
    corrected = text
    for pattern, replacement in PTBR_CORRECTIONS.items():
        corrected = re.sub(pattern, replacement, corrected, flags=re.IGNORECASE)
    return corrected


def validate_file(file):
    if not file or file.filename == "":
        return False, "Nenhum arquivo enviado"
    ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else ""
    if ext not in VALID_EXTENSIONS:
        return False, f"Formato '.{ext}' não suportado. Use: {', '.join(sorted(VALID_EXTENSIONS))}"
    return True, ""


def check_api_key():
    if not API_KEY:
        return True
    key = request.headers.get("X-API-Key", "")
    return key == API_KEY


def get_duration(filepath):
    try:
        result = subprocess.run(
            ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", filepath],
            capture_output=True, text=True, timeout=30
        )
        info = json.loads(result.stdout)
        return float(info.get("format", {}).get("duration", 0))
    except Exception:
        return 0


# =====================================================
#  ROTAS
# =====================================================
@app.route("/", methods=["GET"])
def index():
    import torch
    return jsonify({
        "app": "TranscreVid API",
        "status": "online",
        "version": "2.1",
        "device": "cuda" if torch.cuda.is_available() else "cpu",
        "models_available": VALID_MODELS,
        "models_loaded": list(_models.keys()),
        "protected": bool(API_KEY),
        "max_file_mb": MAX_FILE_SIZE_MB,
        "max_duration_sec": MAX_DURATION_SEC,
        "features": ["txt", "srt", "pt-br corrections"]
    })


@app.route("/health", methods=["GET"])
def health():
    import torch
    return jsonify({
        "status": "ok",
        "device": "cuda" if torch.cuda.is_available() else "cpu"
    })


@app.route("/transcribe", methods=["POST"])
def transcribe():
    import torch

    if not check_api_key():
        return jsonify({"error": "API Key inválida"}), 401

    if "video" not in request.files:
        return jsonify({"error": "Envie um arquivo no campo 'video'"}), 400

    file = request.files["video"]
    valid, msg = validate_file(file)
    if not valid:
        return jsonify({"error": msg}), 400

    output_format = request.form.get("format", "txt").lower()
    model_name = request.form.get("model", DEFAULT_MODEL).lower()
    language_input = request.form.get("language", "").strip()

    is_ptbr = False
    is_ptpt = False
    if language_input == "pt-br":
        language = "pt"
        is_ptbr = True
    elif language_input == "pt-pt":
        language = "pt"
        is_ptpt = True
    elif language_input:
        language = language_input
    else:
        language = None

    if output_format not in ("txt", "srt"):
        return jsonify({"error": "Formato deve ser 'txt' ou 'srt'"}), 400

    if model_name not in VALID_MODELS:
        return jsonify({"error": f"Modelo '{model_name}' inválido. Use: {', '.join(VALID_MODELS)}"}), 400

    tmp_video = None
    tmp_audio = None

    try:
        start_time = time.time()

        ext = file.filename.rsplit(".", 1)[-1].lower()
        tmp_video = tempfile.NamedTemporaryFile(
            delete=False, suffix=f".{ext}", dir="/tmp"
        )
        file.save(tmp_video.name)
        tmp_video.close()

        file_size_mb = os.path.getsize(tmp_video.name) / (1024 * 1024)
        if file_size_mb > MAX_FILE_SIZE_MB:
            return jsonify({"error": f"Arquivo muito grande ({file_size_mb:.0f} MB). Máximo: {MAX_FILE_SIZE_MB} MB"}), 400

        duration = get_duration(tmp_video.name)
        if duration > MAX_DURATION_SEC:
            return jsonify({"error": f"Vídeo muito longo ({duration:.0f}s). Máximo: {MAX_DURATION_SEC}s"}), 400

        tmp_audio = tempfile.NamedTemporaryFile(
            delete=False, suffix=".wav", dir="/tmp"
        )
        tmp_audio.close()

        ffmpeg_cmd = [
            "ffmpeg", "-y",
            "-i", tmp_video.name,
            "-vn",
            "-acodec", "pcm_s16le",
            "-ar", "16000",
            "-ac", "1",
            tmp_audio.name
        ]

        result = subprocess.run(
            ffmpeg_cmd, capture_output=True, text=True, timeout=300
        )

        if result.returncode != 0:
            return jsonify({"error": f"Erro ao extrair áudio: {result.stderr[:500]}"}), 500

        model = get_model(model_name)

        transcribe_opts = {"fp16": False}
        if language:
            transcribe_opts["language"] = language

        if is_ptbr:
            transcribe_opts["initial_prompt"] = (
                "Esta é uma transcrição em português brasileiro. "
                "Use vocabulário e expressões do Brasil. "
                "Exemplos: pijamão, camisetão, tá, né, você, pra, legal, beleza, "
                "carrinho, TikTok, confortável."
            )
        elif is_ptpt:
            transcribe_opts["initial_prompt"] = (
                "Esta é uma transcrição em português europeu. "
                "Use vocabulário e expressões de Portugal. "
                "Exemplos: fixe, giro, autocarro, telemóvel, pequeno-almoço, "
                "fantástico, espetacular."
            )

        result = model.transcribe(tmp_audio.name, **transcribe_opts)

        processing_time = time.time() - start_time

        segments = result.get("segments", [])

        if is_ptbr:
            for seg in segments:
                seg["text"] = apply_ptbr_corrections(seg["text"])
            full_text = apply_ptbr_corrections(result.get("text", "").strip())
        else:
            full_text = result.get("text", "").strip()

        if output_format == "srt":
            transcription = generate_srt(segments)
        else:
            transcription = full_text

        word_count = len(transcription.split()) if output_format == "txt" else sum(
            len(s.get("text", "").split()) for s in segments
        )

        detected_lang = result.get("language", "desconhecido")

        if is_ptbr:
            detected_lang = "pt-br"
        elif is_ptpt:
            detected_lang = "pt-pt"

        return jsonify({
            "transcription": transcription,
            "format": output_format,
            "language_detected": detected_lang,
            "duration_seconds": round(duration, 2),
            "processing_seconds": round(processing_time, 2),
            "word_count": word_count,
            "segments_count": len(segments),
            "speed": f"{duration / processing_time:.1f}x" if processing_time > 0 else "N/A",
            "model_used": model_name,
            "device": "cuda" if torch.cuda.is_available() else "cpu"
        })

    except subprocess.TimeoutExpired:
        return jsonify({"error": "Processamento demorou demais. Tente um vídeo menor."}), 504
    except Exception as e:
        return jsonify({"error": f"Erro interno: {str(e)}"}), 500
    finally:
        for f in [tmp_video, tmp_audio]:
            if f and os.path.exists(f.name):
                try:
                    os.unlink(f.name)
                except OSError:
                    pass


# =====================================================
#  INICIAR SERVIDOR
# =====================================================
if __name__ == "__main__":
    print("=" * 50)
    print("  TranscreVid API v2.1")
    print("=" * 50)

    try:
        print(f"[INIT] Carregando modelo '{DEFAULT_MODEL}'...")
        get_model(DEFAULT_MODEL)
        print(f"[INIT] Modelo carregado com sucesso!")
    except Exception as e:
        print(f"[WARN] Erro ao pre-carregar modelo: {e}")

    print(f"[INIT] Servidor iniciando na porta 7860...")
    app.run(host="0.0.0.0", port=7860, debug=False)