File size: 3,824 Bytes
4d96ab6
 
 
47e437f
f4c6a80
53e656d
eb54254
 
 
f7aabd4
4d96ab6
58153e3
4d96ab6
 
 
 
 
 
e9990b1
 
 
 
 
4d96ab6
e9990b1
4d96ab6
 
 
 
 
 
 
 
 
 
 
 
 
e9990b1
4d96ab6
 
 
 
 
 
 
53e656d
4d96ab6
 
 
 
 
 
 
 
 
 
53e656d
4d96ab6
 
 
53e656d
4d96ab6
 
53e656d
4d96ab6
 
 
 
 
 
 
 
 
53e656d
4d96ab6
e9990b1
4d96ab6
 
e9990b1
 
4d96ab6
 
e9990b1
4d96ab6
 
5440559
4d96ab6
eb54254
 
 
 
 
 
 
4d96ab6
eb54254
 
 
4d96ab6
eb54254
 
 
 
 
4d96ab6
 
 
eb54254
 
 
 
 
 
 
 
4d96ab6
eb54254
4d96ab6
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import base64
import tempfile
import os
from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH
os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) # "./models"
from app.logger import log
from app.sentiment_model import analyze_sentiment_portgues
from app.translator import translate_long_text
from app.emotion_model import analyze_emotion
import whisper
from moviepy.editor import VideoFileClip
whisper_model = whisper.load_model(WHISPER_MODEL)


# -------------------------
# 🔥 UTIL: salvar base64
# -------------------------
def save_base64_to_file(base64_data, suffix):
    try:
        file_bytes = base64.b64decode(base64_data)
    except Exception:
        raise ValueError("Base64 inválido")

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)

    with open(temp_file.name, "wb") as f:
        f.write(file_bytes)

    return temp_file.name

# -------------------------
# 🔥 UTIL: extrair áudio
# -------------------------
def extract_audio_from_video(video_path):
    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")

    video = VideoFileClip(video_path)
    video.audio.write_audiofile(temp_audio.name)
    video.close()

    return temp_audio.name


# -------------------------
# 🔥 PROCESSAMENTO PRINCIPAL
# -------------------------
def process_audio(video_base64=None, audio_base64=None, logger=None):

    video_path = None
    audio_path = None

    try:
        # -------------------------
        # 🎯 1. ORIGEM DO ÁUDIO
        # -------------------------

        if audio_base64:
            log("Recebido áudio base64", logger=logger)
            audio_path = save_base64_to_file(audio_base64, ".wav")

        elif video_base64:
            log("Recebido vídeo base64", logger=logger)
            video_path = save_base64_to_file(video_base64, ".mp4")

            log("Extraindo áudio do vídeo...", logger=logger)
            audio_path = extract_audio_from_video(video_path)

        else:
            raise ValueError("Nenhum áudio ou vídeo fornecido")

        # -------------------------
        # 🎯 2. WHISPER
        # -------------------------

        log("Transcrevendo com Whisper...", logger=logger)

        result = whisper_model.transcribe(
            audio_path,
            language="pt",
            verbose=False,
            fp16=False  # importante para CPU
        )

        text = " ".join([seg["text"] for seg in result["segments"]])

        # -------------------------
        # 🎯 3. ROBERTA (cardiffnlp/twitter-xlm-roberta)
        # -------------------------
        #sentiment, score = analyze_sentiment_portgues(text, logger)
        #print(f"Sentimento: {sentiment} ({score})")
        #return {
        #    "transcription": text,
        #    "sentiment": sentiment,
        #    "score": score
        #}

        # -------------------------
        # 🎯 3. TRADUÇÃO + EMOÇÃO
        # -------------------------

        print("Traduzindo texto para inglês...")
        text_en = translate_long_text(text)

        print("Analisando emoção...")
        emotion_result = analyze_emotion(text_en)

        return {
            "transcription": text,
            "translation": text_en,

            "analysis": {
                "emotion": emotion_result["emotion"],
                "confidence": emotion_result["confidence"],
                "emotion_raw": emotion_result["emotion_raw"],
                "all_emotions": emotion_result["all_emotions"]
            }
        }
    
    finally:
        # -------------------------
        # 🧹 LIMPEZA (MUITO IMPORTANTE)
        # -------------------------

        if video_path and os.path.exists(video_path):
            os.remove(video_path)

        if audio_path and os.path.exists(audio_path):
            os.remove(audio_path)