Spaces:

CLMARRARA
/

tech_challenge_fase_4_audio_backend

Sleeping

App Files Files Community

tech_challenge_fase_4_audio_backend / app /audio_processor.py

CLMARRARA

Adicionado POC do serviço de audio (Adicionado tradutor e emotion-english)

eb54254 about 1 month ago

raw

history blame contribute delete

3.82 kB

	import base64
	import tempfile
	import os
	from app.config import WHISPER_MODEL, WHISPER_MODEL_PATH
	os.environ["XDG_CACHE_HOME"] = str(WHISPER_MODEL_PATH) # "./models"
	from app.logger import log
	from app.sentiment_model import analyze_sentiment_portgues
	from app.translator import translate_long_text
	from app.emotion_model import analyze_emotion
	import whisper
	from moviepy.editor import VideoFileClip
	whisper_model = whisper.load_model(WHISPER_MODEL)


	# -------------------------
	# 🔥 UTIL: salvar base64
	# -------------------------
	def save_base64_to_file(base64_data, suffix):
	try:
	file_bytes = base64.b64decode(base64_data)
	except Exception:
	raise ValueError("Base64 inválido")

	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)

	with open(temp_file.name, "wb") as f:
	f.write(file_bytes)

	return temp_file.name

	# -------------------------
	# 🔥 UTIL: extrair áudio
	# -------------------------
	def extract_audio_from_video(video_path):
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")

	video = VideoFileClip(video_path)
	video.audio.write_audiofile(temp_audio.name)
	video.close()

	return temp_audio.name


	# -------------------------
	# 🔥 PROCESSAMENTO PRINCIPAL
	# -------------------------
	def process_audio(video_base64=None, audio_base64=None, logger=None):

	video_path = None
	audio_path = None

	try:
	# -------------------------
	# 🎯 1. ORIGEM DO ÁUDIO
	# -------------------------

	if audio_base64:
	log("Recebido áudio base64", logger=logger)
	audio_path = save_base64_to_file(audio_base64, ".wav")

	elif video_base64:
	log("Recebido vídeo base64", logger=logger)
	video_path = save_base64_to_file(video_base64, ".mp4")

	log("Extraindo áudio do vídeo...", logger=logger)
	audio_path = extract_audio_from_video(video_path)

	else:
	raise ValueError("Nenhum áudio ou vídeo fornecido")

	# -------------------------
	# 🎯 2. WHISPER
	# -------------------------

	log("Transcrevendo com Whisper...", logger=logger)

	result = whisper_model.transcribe(
	audio_path,
	language="pt",
	verbose=False,
	fp16=False # importante para CPU
	)

	text = " ".join([seg["text"] for seg in result["segments"]])

	# -------------------------
	# 🎯 3. ROBERTA (cardiffnlp/twitter-xlm-roberta)
	# -------------------------
	#sentiment, score = analyze_sentiment_portgues(text, logger)
	#print(f"Sentimento: {sentiment} ({score})")
	#return {
	# "transcription": text,
	# "sentiment": sentiment,
	# "score": score
	#}

	# -------------------------
	# 🎯 3. TRADUÇÃO + EMOÇÃO
	# -------------------------

	print("Traduzindo texto para inglês...")
	text_en = translate_long_text(text)

	print("Analisando emoção...")
	emotion_result = analyze_emotion(text_en)

	return {
	"transcription": text,
	"translation": text_en,

	"analysis": {
	"emotion": emotion_result["emotion"],
	"confidence": emotion_result["confidence"],
	"emotion_raw": emotion_result["emotion_raw"],
	"all_emotions": emotion_result["all_emotions"]
	}
	}

	finally:
	# -------------------------
	# 🧹 LIMPEZA (MUITO IMPORTANTE)
	# -------------------------

	if video_path and os.path.exists(video_path):
	os.remove(video_path)

	if audio_path and os.path.exists(audio_path):
	os.remove(audio_path)