import base64 import tempfile import os from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) # "./models" from app.logger import log from app.sentiment_model import analyze_sentiment_portgues from app.translator import translate_long_text from app.emotion_model import analyze_emotion import whisper from moviepy.editor import VideoFileClip whisper_model = whisper.load_model(WHISPER_MODEL) # ------------------------- # 🔥 UTIL: salvar base64 # ------------------------- def save_base64_to_file(base64_data, suffix): try: file_bytes = base64.b64decode(base64_data) except Exception: raise ValueError("Base64 inválido") temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) with open(temp_file.name, "wb") as f: f.write(file_bytes) return temp_file.name # ------------------------- # 🔥 UTIL: extrair áudio # ------------------------- def extract_audio_from_video(video_path): temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") video = VideoFileClip(video_path) video.audio.write_audiofile(temp_audio.name) video.close() return temp_audio.name # ------------------------- # 🔥 PROCESSAMENTO PRINCIPAL # ------------------------- def process_audio(video_base64=None, audio_base64=None, logger=None): video_path = None audio_path = None try: # ------------------------- # 🎯 1. ORIGEM DO ÁUDIO # ------------------------- if audio_base64: log("Recebido áudio base64", logger=logger) audio_path = save_base64_to_file(audio_base64, ".wav") elif video_base64: log("Recebido vídeo base64", logger=logger) video_path = save_base64_to_file(video_base64, ".mp4") log("Extraindo áudio do vídeo...", logger=logger) audio_path = extract_audio_from_video(video_path) else: raise ValueError("Nenhum áudio ou vídeo fornecido") # ------------------------- # 🎯 2. WHISPER # ------------------------- log("Transcrevendo com Whisper...", logger=logger) result = whisper_model.transcribe( audio_path, language="pt", verbose=False, fp16=False # importante para CPU ) text = " ".join([seg["text"] for seg in result["segments"]]) # ------------------------- # 🎯 3. ROBERTA (cardiffnlp/twitter-xlm-roberta) # ------------------------- #sentiment, score = analyze_sentiment_portgues(text, logger) #print(f"Sentimento: {sentiment} ({score})") #return { # "transcription": text, # "sentiment": sentiment, # "score": score #} # ------------------------- # 🎯 3. TRADUÇÃO + EMOÇÃO # ------------------------- print("Traduzindo texto para inglês...") text_en = translate_long_text(text) print("Analisando emoção...") emotion_result = analyze_emotion(text_en) return { "transcription": text, "translation": text_en, "analysis": { "emotion": emotion_result["emotion"], "confidence": emotion_result["confidence"], "emotion_raw": emotion_result["emotion_raw"], "all_emotions": emotion_result["all_emotions"] } } finally: # ------------------------- # 🧹 LIMPEZA (MUITO IMPORTANTE) # ------------------------- if video_path and os.path.exists(video_path): os.remove(video_path) if audio_path and os.path.exists(audio_path): os.remove(audio_path)