Spaces:

CLMARRARA
/

tech_challenge_fase_4_audio_backend

Sleeping

File size: 3,824 Bytes

4d96ab6
 
 
47e437f
f4c6a80
53e656d
eb54254
 
 
f7aabd4
4d96ab6
58153e3
4d96ab6
 
 
 
 
 
e9990b1
 
 
 
 
4d96ab6
e9990b1
4d96ab6
 
 
 
 
 
 
 
 
 
 
 
 
e9990b1
4d96ab6
 
 
 
 
 
 
53e656d
4d96ab6
 
 
 
 
 
 
 
 
 
53e656d
4d96ab6
 
 
53e656d
4d96ab6
 
53e656d
4d96ab6
 
 
 
 
 
 
 
 
53e656d
4d96ab6
e9990b1
4d96ab6
 
e9990b1
 
4d96ab6
 
e9990b1
4d96ab6
 
5440559
4d96ab6
eb54254
 
 
 
 
 
 
4d96ab6
eb54254
 
 
4d96ab6
eb54254
 
 
 
 
4d96ab6
 
 
eb54254
 
 
 
 
 
 
 
4d96ab6
eb54254
4d96ab6

import base64
import tempfile
import os
from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH
os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) # "./models"
from app.logger import log
from app.sentiment_model import analyze_sentiment_portgues
from app.translator import translate_long_text
from app.emotion_model import analyze_emotion
import whisper
from moviepy.editor import VideoFileClip
whisper_model = whisper.load_model(WHISPER_MODEL)


# -------------------------
# 🔥 UTIL: salvar base64
# -------------------------
def save_base64_to_file(base64_data, suffix):
    try:
        file_bytes = base64.b64decode(base64_data)
    except Exception:
        raise ValueError("Base64 inválido")

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)

    with open(temp_file.name, "wb") as f:
        f.write(file_bytes)

    return temp_file.name

# -------------------------
# 🔥 UTIL: extrair áudio
# -------------------------
def extract_audio_from_video(video_path):
    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")

    video = VideoFileClip(video_path)
    video.audio.write_audiofile(temp_audio.name)
    video.close()

    return temp_audio.name


# -------------------------
# 🔥 PROCESSAMENTO PRINCIPAL
# -------------------------
def process_audio(video_base64=None, audio_base64=None, logger=None):

    video_path = None
    audio_path = None

    try:
        # -------------------------
        # 🎯 1. ORIGEM DO ÁUDIO
        # -------------------------

        if audio_base64:
            log("Recebido áudio base64", logger=logger)
            audio_path = save_base64_to_file(audio_base64, ".wav")

        elif video_base64:
            log("Recebido vídeo base64", logger=logger)
            video_path = save_base64_to_file(video_base64, ".mp4")

            log("Extraindo áudio do vídeo...", logger=logger)
            audio_path = extract_audio_from_video(video_path)

        else:
            raise ValueError("Nenhum áudio ou vídeo fornecido")

        # -------------------------
        # 🎯 2. WHISPER
        # -------------------------

        log("Transcrevendo com Whisper...", logger=logger)

        result = whisper_model.transcribe(
            audio_path,
            language="pt",
            verbose=False,
            fp16=False  # importante para CPU
        )

        text = " ".join([seg["text"] for seg in result["segments"]])

        # -------------------------
        # 🎯 3. ROBERTA (cardiffnlp/twitter-xlm-roberta)
        # -------------------------
        #sentiment, score = analyze_sentiment_portgues(text, logger)
        #print(f"Sentimento: {sentiment} ({score})")
        #return {
        #    "transcription": text,
        #    "sentiment": sentiment,
        #    "score": score
        #}

        # -------------------------
        # 🎯 3. TRADUÇÃO + EMOÇÃO
        # -------------------------

        print("Traduzindo texto para inglês...")
        text_en = translate_long_text(text)

        print("Analisando emoção...")
        emotion_result = analyze_emotion(text_en)

        return {
            "transcription": text,
            "translation": text_en,

            "analysis": {
                "emotion": emotion_result["emotion"],
                "confidence": emotion_result["confidence"],
                "emotion_raw": emotion_result["emotion_raw"],
                "all_emotions": emotion_result["all_emotions"]
            }
        }
    
    finally:
        # -------------------------
        # 🧹 LIMPEZA (MUITO IMPORTANTE)
        # -------------------------

        if video_path and os.path.exists(video_path):
            os.remove(video_path)

        if audio_path and os.path.exists(audio_path):
            os.remove(audio_path)