Spaces:

JennyGD
/

transcritor-whisper-medium

Build error

App Files Files Community

transcritor-whisper-medium / app.py

JennyGD

Upload 3 files

9666fb9 verified 7 months ago

raw

history blame contribute delete

4.55 kB

	import streamlit as st
	import tempfile
	import os
	import time
	import io
	import torch
	from transformers import pipeline

	st.set_page_config(page_title="Transcrição de Áudios", page_icon="🎙️")
	st.title("🎙️ Transcrição de Ligações - Evolux")
	st.write("Upload de áudios OGG para transcrição automática")

	def converter_audio_para_wav(audio_bytes):
	"""Converte áudio OGG para WAV em memória usando pydub"""
	try:
	from pydub import AudioSegment
	AudioSegment.converter = r"C:\ffmpeg\bin\ffmpeg.exe" # Ajuste se o ffmpeg.exe estiver em outro lugar (ex: C:\ffmpeg\ffmpeg.exe)
	AudioSegment.ffprobe = r"C:\ffmpeg\bin\ffprobe.exe"

	audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format="ogg")

	audio = audio.normalize()
	audio = audio.low_pass_filter(3000)
	audio = audio.set_frame_rate(16000).set_channels(1)

	wav_io = io.BytesIO()
	audio.export(wav_io, format="wav")
	wav_io.seek(0)
	return wav_io
	except Exception as e:
	st.error(f"Erro na conversão: {e}")
	return None

	@st.cache_resource
	def carregar_modelo():
	try:
	pipe = pipeline(
	"automatic-speech-recognition",
	model="jlondonobo/whisper-medium-pt",
	device="cuda:0" if torch.cuda.is_available() else "cpu"
	)
	pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="pt", task="transcribe")
	return pipe
	except Exception as e:
	st.error(f"Erro ao carregar modelo: {e}")
	return None

	arquivo_audio = st.file_uploader(
	"Escolha o arquivo de áudio (OGG, MP3, WAV, M4A)",
	type=['ogg', 'mp3', 'wav', 'm4a']
	)

	if arquivo_audio is not None:
	st.audio(arquivo_audio)

	if st.button("🎯 Transcrever Áudio"):
	with st.spinner('🔄 Processando... (pode levar alguns minutos)'):
	try:
	if arquivo_audio.type == 'audio/ogg':
	st.info("🔧 Convertendo OGG para WAV...")
	wav_io = converter_audio_para_wav(arquivo_audio.getvalue())
	if wav_io is None:
	st.error("❌ Falha na conversão do áudio")
	st.stop()
	caminho_audio = wav_io.getvalue()
	else:
	caminho_audio = arquivo_audio.getvalue()

	modelo = carregar_modelo()
	if modelo is None:
	st.error("❌ Modelo não carregado")
	else:
	inicio = time.time()
	resultado = modelo(
	caminho_audio,
	chunk_length_s=30,
	batch_size=8,
	generate_kwargs={
	"num_beams": 5,
	"temperature": 0.0,
	"do_sample": False
	},
	return_timestamps=False
	)
	tempo_processamento = time.time() - inicio

	texto = resultado["text"].strip()

	st.success("✅ Transcrição concluída!")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Tempo", f"{tempo_processamento:.2f}s")
	with col2:
	st.metric("Caracteres", len(texto))

	st.subheader("📝 Transcrição:")
	st.text_area("Texto transcrito", texto, height=300)

	st.download_button(
	label="📥 Baixar Transcrição",
	data=texto,
	file_name=f"transcricao_{arquivo_audio.name}.txt",
	mime="text/plain"
	)

	except Exception as e:
	st.error(f"❌ Erro na transcrição: {str(e)}")
	st.info("💡 Tente usar formato MP3 ou WAV")

	st.markdown("---")
	st.markdown("💡 Informações:")
	st.markdown("- Formatos: OGG, MP3, WAV, M4A")
	st.markdown("- Modelo: Whisper Medium PT (fine-tuned para português via Hugging Face)")
	st.markdown("- Idioma: Português")