Spaces:

CLMARRARA
/

tech_challenge_fase_4_backend

Sleeping

File size: 1,552 Bytes

1a64f28
 
95ba2d8
 
 
1a64f28
 
 
32033ee
699a6f5
 
1a64f28
 
 
4ff5df1
0f34a7d
4ff5df1
699a6f5
4ff5df1
0f34a7d
32033ee
699a6f5
 
 
 
 
 
 
 
 
1a64f28
 
0f34a7d
1a64f28
 
 
 
 
 
 
 
 
 
0f34a7d
4ff5df1
0f34a7d
1a64f28

from faster_whisper import WhisperModel
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH
from app.logger import log
from app.video_utils import extract_audio

whisper_model = WhisperModel(
    WHISPER_MODEL,

    download_root=WHISPER_MODEL_PATH,
    compute_type="int8", # Use "int8" para uma versão mais leve, ou "float16" para melhor precisão (requer GPU compatível
)
analyzer = SentimentIntensityAnalyzer()

def extract_audio_sentiment_from_video(video_path, logger=None):
    log("Iniciando extração de áudio a partir do vídeo com Whisper e MoviePy.")

    audio_path = extract_audio(video_path, output_audio="./data/output/audio.wav")

    log("Áudio extraído com sucesso! Iniciando transcrição e análise de sentimento com Whisper e VADER.")

    segments, info = whisper_model.transcribe(
        audio_path,
        language="pt",
        task="transcribe",
        beam_size=5,
        best_of=5,
        temperature=0,
        initial_prompt="Este áudio está em português do Brasil e contém frases informais."
    )

    text = " ".join([seg.text for seg in segments])
    log(f"Transcrição: {text}")

    score = analyzer.polarity_scores(text)["compound"]

    if score <= -0.3:
        sentiment = "negative"
    elif score >= 0.3:
        sentiment = "positive"
    else:
        sentiment = "neutral"

    log(f"Sentimento: {sentiment} ({score})")

    log("Áudio processado com Whisper e VADER com sucesso!")
    
    return text, sentiment