| import base64 |
| import tempfile |
| import os |
| from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH |
| os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) |
| from app.logger import log |
| from app.sentiment_model import analyze_sentiment_portgues |
| from app.translator import translate_long_text |
| from app.emotion_model import analyze_emotion |
| import whisper |
| from moviepy.editor import VideoFileClip |
| whisper_model = whisper.load_model(WHISPER_MODEL) |
|
|
|
|
| |
| |
| |
| def save_base64_to_file(base64_data, suffix): |
| try: |
| file_bytes = base64.b64decode(base64_data) |
| except Exception: |
| raise ValueError("Base64 inválido") |
|
|
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) |
|
|
| with open(temp_file.name, "wb") as f: |
| f.write(file_bytes) |
|
|
| return temp_file.name |
|
|
| |
| |
| |
| def extract_audio_from_video(video_path): |
| temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") |
|
|
| video = VideoFileClip(video_path) |
| video.audio.write_audiofile(temp_audio.name) |
| video.close() |
|
|
| return temp_audio.name |
|
|
|
|
| |
| |
| |
| def process_audio(video_base64=None, audio_base64=None, logger=None): |
|
|
| video_path = None |
| audio_path = None |
|
|
| try: |
| |
| |
| |
|
|
| if audio_base64: |
| log("Recebido áudio base64", logger=logger) |
| audio_path = save_base64_to_file(audio_base64, ".wav") |
|
|
| elif video_base64: |
| log("Recebido vídeo base64", logger=logger) |
| video_path = save_base64_to_file(video_base64, ".mp4") |
|
|
| log("Extraindo áudio do vídeo...", logger=logger) |
| audio_path = extract_audio_from_video(video_path) |
|
|
| else: |
| raise ValueError("Nenhum áudio ou vídeo fornecido") |
|
|
| |
| |
| |
|
|
| log("Transcrevendo com Whisper...", logger=logger) |
|
|
| result = whisper_model.transcribe( |
| audio_path, |
| language="pt", |
| verbose=False, |
| fp16=False |
| ) |
|
|
| text = " ".join([seg["text"] for seg in result["segments"]]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| print("Traduzindo texto para inglês...") |
| text_en = translate_long_text(text) |
|
|
| print("Analisando emoção...") |
| emotion_result = analyze_emotion(text_en) |
|
|
| return { |
| "transcription": text, |
| "translation": text_en, |
|
|
| "analysis": { |
| "emotion": emotion_result["emotion"], |
| "confidence": emotion_result["confidence"], |
| "emotion_raw": emotion_result["emotion_raw"], |
| "all_emotions": emotion_result["all_emotions"] |
| } |
| } |
| |
| finally: |
| |
| |
| |
|
|
| if video_path and os.path.exists(video_path): |
| os.remove(video_path) |
|
|
| if audio_path and os.path.exists(audio_path): |
| os.remove(audio_path) |