import os import time import tempfile import subprocess import json import re from flask import Flask, request, jsonify from flask_cors import CORS # ===================================================== # CONFIGURAÇÃO # ===================================================== API_KEY = os.environ.get("API_KEY", "") MAX_FILE_SIZE_MB = 5120 MAX_DURATION_SEC = 8000 VALID_EXTENSIONS = { "mp4", "mkv", "avi", "mov", "wmv", "flv", "webm", "m4v", "mp3", "wav", "ogg", "m4a", "aac", "wma", "flac" } VALID_MODELS = ["tiny", "base", "small", "medium"] DEFAULT_MODEL = "base" # Correções comuns em PT-BR PTBR_CORRECTIONS = { r'\bpijão\b': 'pijamão', r'\bpijao\b': 'pijamão', r'\bta\b': 'tá', r'\bvc\b': 'você', r'\btô\b': 'tô', r'\bné\b': 'né', r'\bdto\b': 'direito', r'\besqdo\b': 'esquerdo', } # ===================================================== # APP FLASK # ===================================================== app = Flask(__name__) CORS(app, resources={r"/*": {"origins": "*"}}) # ===================================================== # CACHE DE MODELOS # ===================================================== _models = {} def get_model(name="base"): import whisper import torch if name not in _models: print(f"[INFO] Carregando modelo '{name}'...") device = "cuda" if torch.cuda.is_available() else "cpu" _models[name] = whisper.load_model(name, device=device) print(f"[INFO] Modelo '{name}' carregado no dispositivo: {device}") return _models[name] # ===================================================== # FUNÇÕES AUXILIARES # ===================================================== def format_timestamp(seconds): hrs = int(seconds // 3600) mins = int((seconds % 3600) // 60) secs = int(seconds % 60) millis = int((seconds - int(seconds)) * 1000) return f"{hrs:02d}:{mins:02d}:{secs:02d},{millis:03d}" def generate_srt(segments): srt_lines = [] for i, seg in enumerate(segments, 1): start = format_timestamp(seg["start"]) end = format_timestamp(seg["end"]) text = seg["text"].strip() srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n") return "\n".join(srt_lines) def apply_ptbr_corrections(text): corrected = text for pattern, replacement in PTBR_CORRECTIONS.items(): corrected = re.sub(pattern, replacement, corrected, flags=re.IGNORECASE) return corrected def validate_file(file): if not file or file.filename == "": return False, "Nenhum arquivo enviado" ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "" if ext not in VALID_EXTENSIONS: return False, f"Formato '.{ext}' não suportado. Use: {', '.join(sorted(VALID_EXTENSIONS))}" return True, "" def check_api_key(): if not API_KEY: return True key = request.headers.get("X-API-Key", "") return key == API_KEY def get_duration(filepath): try: result = subprocess.run( ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", filepath], capture_output=True, text=True, timeout=30 ) info = json.loads(result.stdout) return float(info.get("format", {}).get("duration", 0)) except Exception: return 0 # ===================================================== # ROTAS # ===================================================== @app.route("/", methods=["GET"]) def index(): import torch return jsonify({ "app": "TranscreVid API", "status": "online", "version": "2.1", "device": "cuda" if torch.cuda.is_available() else "cpu", "models_available": VALID_MODELS, "models_loaded": list(_models.keys()), "protected": bool(API_KEY), "max_file_mb": MAX_FILE_SIZE_MB, "max_duration_sec": MAX_DURATION_SEC, "features": ["txt", "srt", "pt-br corrections"] }) @app.route("/health", methods=["GET"]) def health(): import torch return jsonify({ "status": "ok", "device": "cuda" if torch.cuda.is_available() else "cpu" }) @app.route("/transcribe", methods=["POST"]) def transcribe(): import torch if not check_api_key(): return jsonify({"error": "API Key inválida"}), 401 if "video" not in request.files: return jsonify({"error": "Envie um arquivo no campo 'video'"}), 400 file = request.files["video"] valid, msg = validate_file(file) if not valid: return jsonify({"error": msg}), 400 output_format = request.form.get("format", "txt").lower() model_name = request.form.get("model", DEFAULT_MODEL).lower() language_input = request.form.get("language", "").strip() is_ptbr = False is_ptpt = False if language_input == "pt-br": language = "pt" is_ptbr = True elif language_input == "pt-pt": language = "pt" is_ptpt = True elif language_input: language = language_input else: language = None if output_format not in ("txt", "srt"): return jsonify({"error": "Formato deve ser 'txt' ou 'srt'"}), 400 if model_name not in VALID_MODELS: return jsonify({"error": f"Modelo '{model_name}' inválido. Use: {', '.join(VALID_MODELS)}"}), 400 tmp_video = None tmp_audio = None try: start_time = time.time() ext = file.filename.rsplit(".", 1)[-1].lower() tmp_video = tempfile.NamedTemporaryFile( delete=False, suffix=f".{ext}", dir="/tmp" ) file.save(tmp_video.name) tmp_video.close() file_size_mb = os.path.getsize(tmp_video.name) / (1024 * 1024) if file_size_mb > MAX_FILE_SIZE_MB: return jsonify({"error": f"Arquivo muito grande ({file_size_mb:.0f} MB). Máximo: {MAX_FILE_SIZE_MB} MB"}), 400 duration = get_duration(tmp_video.name) if duration > MAX_DURATION_SEC: return jsonify({"error": f"Vídeo muito longo ({duration:.0f}s). Máximo: {MAX_DURATION_SEC}s"}), 400 tmp_audio = tempfile.NamedTemporaryFile( delete=False, suffix=".wav", dir="/tmp" ) tmp_audio.close() ffmpeg_cmd = [ "ffmpeg", "-y", "-i", tmp_video.name, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", tmp_audio.name ] result = subprocess.run( ffmpeg_cmd, capture_output=True, text=True, timeout=300 ) if result.returncode != 0: return jsonify({"error": f"Erro ao extrair áudio: {result.stderr[:500]}"}), 500 model = get_model(model_name) transcribe_opts = {"fp16": False} if language: transcribe_opts["language"] = language if is_ptbr: transcribe_opts["initial_prompt"] = ( "Esta é uma transcrição em português brasileiro. " "Use vocabulário e expressões do Brasil. " "Exemplos: pijamão, camisetão, tá, né, você, pra, legal, beleza, " "carrinho, TikTok, confortável." ) elif is_ptpt: transcribe_opts["initial_prompt"] = ( "Esta é uma transcrição em português europeu. " "Use vocabulário e expressões de Portugal. " "Exemplos: fixe, giro, autocarro, telemóvel, pequeno-almoço, " "fantástico, espetacular." ) result = model.transcribe(tmp_audio.name, **transcribe_opts) processing_time = time.time() - start_time segments = result.get("segments", []) if is_ptbr: for seg in segments: seg["text"] = apply_ptbr_corrections(seg["text"]) full_text = apply_ptbr_corrections(result.get("text", "").strip()) else: full_text = result.get("text", "").strip() if output_format == "srt": transcription = generate_srt(segments) else: transcription = full_text word_count = len(transcription.split()) if output_format == "txt" else sum( len(s.get("text", "").split()) for s in segments ) detected_lang = result.get("language", "desconhecido") if is_ptbr: detected_lang = "pt-br" elif is_ptpt: detected_lang = "pt-pt" return jsonify({ "transcription": transcription, "format": output_format, "language_detected": detected_lang, "duration_seconds": round(duration, 2), "processing_seconds": round(processing_time, 2), "word_count": word_count, "segments_count": len(segments), "speed": f"{duration / processing_time:.1f}x" if processing_time > 0 else "N/A", "model_used": model_name, "device": "cuda" if torch.cuda.is_available() else "cpu" }) except subprocess.TimeoutExpired: return jsonify({"error": "Processamento demorou demais. Tente um vídeo menor."}), 504 except Exception as e: return jsonify({"error": f"Erro interno: {str(e)}"}), 500 finally: for f in [tmp_video, tmp_audio]: if f and os.path.exists(f.name): try: os.unlink(f.name) except OSError: pass # ===================================================== # INICIAR SERVIDOR # ===================================================== if __name__ == "__main__": print("=" * 50) print(" TranscreVid API v2.1") print("=" * 50) try: print(f"[INIT] Carregando modelo '{DEFAULT_MODEL}'...") get_model(DEFAULT_MODEL) print(f"[INIT] Modelo carregado com sucesso!") except Exception as e: print(f"[WARN] Erro ao pre-carregar modelo: {e}") print(f"[INIT] Servidor iniciando na porta 7860...") app.run(host="0.0.0.0", port=7860, debug=False)