File size: 3,824 Bytes
4d96ab6 47e437f f4c6a80 53e656d eb54254 f7aabd4 4d96ab6 58153e3 4d96ab6 e9990b1 4d96ab6 e9990b1 4d96ab6 e9990b1 4d96ab6 53e656d 4d96ab6 53e656d 4d96ab6 53e656d 4d96ab6 53e656d 4d96ab6 53e656d 4d96ab6 e9990b1 4d96ab6 e9990b1 4d96ab6 e9990b1 4d96ab6 5440559 4d96ab6 eb54254 4d96ab6 eb54254 4d96ab6 eb54254 4d96ab6 eb54254 4d96ab6 eb54254 4d96ab6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import base64
import tempfile
import os
from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH
os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) # "./models"
from app.logger import log
from app.sentiment_model import analyze_sentiment_portgues
from app.translator import translate_long_text
from app.emotion_model import analyze_emotion
import whisper
from moviepy.editor import VideoFileClip
whisper_model = whisper.load_model(WHISPER_MODEL)
# -------------------------
# 🔥 UTIL: salvar base64
# -------------------------
def save_base64_to_file(base64_data, suffix):
try:
file_bytes = base64.b64decode(base64_data)
except Exception:
raise ValueError("Base64 inválido")
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
with open(temp_file.name, "wb") as f:
f.write(file_bytes)
return temp_file.name
# -------------------------
# 🔥 UTIL: extrair áudio
# -------------------------
def extract_audio_from_video(video_path):
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
video = VideoFileClip(video_path)
video.audio.write_audiofile(temp_audio.name)
video.close()
return temp_audio.name
# -------------------------
# 🔥 PROCESSAMENTO PRINCIPAL
# -------------------------
def process_audio(video_base64=None, audio_base64=None, logger=None):
video_path = None
audio_path = None
try:
# -------------------------
# 🎯 1. ORIGEM DO ÁUDIO
# -------------------------
if audio_base64:
log("Recebido áudio base64", logger=logger)
audio_path = save_base64_to_file(audio_base64, ".wav")
elif video_base64:
log("Recebido vídeo base64", logger=logger)
video_path = save_base64_to_file(video_base64, ".mp4")
log("Extraindo áudio do vídeo...", logger=logger)
audio_path = extract_audio_from_video(video_path)
else:
raise ValueError("Nenhum áudio ou vídeo fornecido")
# -------------------------
# 🎯 2. WHISPER
# -------------------------
log("Transcrevendo com Whisper...", logger=logger)
result = whisper_model.transcribe(
audio_path,
language="pt",
verbose=False,
fp16=False # importante para CPU
)
text = " ".join([seg["text"] for seg in result["segments"]])
# -------------------------
# 🎯 3. ROBERTA (cardiffnlp/twitter-xlm-roberta)
# -------------------------
#sentiment, score = analyze_sentiment_portgues(text, logger)
#print(f"Sentimento: {sentiment} ({score})")
#return {
# "transcription": text,
# "sentiment": sentiment,
# "score": score
#}
# -------------------------
# 🎯 3. TRADUÇÃO + EMOÇÃO
# -------------------------
print("Traduzindo texto para inglês...")
text_en = translate_long_text(text)
print("Analisando emoção...")
emotion_result = analyze_emotion(text_en)
return {
"transcription": text,
"translation": text_en,
"analysis": {
"emotion": emotion_result["emotion"],
"confidence": emotion_result["confidence"],
"emotion_raw": emotion_result["emotion_raw"],
"all_emotions": emotion_result["all_emotions"]
}
}
finally:
# -------------------------
# 🧹 LIMPEZA (MUITO IMPORTANTE)
# -------------------------
if video_path and os.path.exists(video_path):
os.remove(video_path)
if audio_path and os.path.exists(audio_path):
os.remove(audio_path) |