Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration | |
| import librosa | |
| import torch | |
| import os | |
| from huggingface_hub import login | |
| # Obtener el token desde las variables de entorno | |
| token = os.getenv("HF_TOKEN") | |
| if token: | |
| login(token=token) | |
| else: | |
| raise ValueError("El token de Hugging Face no est谩 configurado en las variables de entorno.") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Cargar el modelo | |
| repo_name = "HugoZeballos/rapa_nui_asr_2" # Ajusta al nombre de tu modelo en Hugging Face | |
| processor = Speech2TextProcessor.from_pretrained(repo_name) | |
| model = Speech2TextForConditionalGeneration.from_pretrained(repo_name).to(device) | |
| # Cambiar `source` a una configuraci贸n v谩lida o eliminarlo | |
| inputs = gr.Audio(type="filepath") | |
| outputs = gr.Textbox(label="Transcripci贸n") | |
| def transcribe(audio_path): | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding="longest").to("cuda") | |
| with torch.no_grad(): | |
| predicted_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"]) | |
| transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| return transcription | |
| # Crear interfaz | |
| interface = gr.Interface( | |
| fn=transcribe, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="ASR Demo" | |
| ) | |
| # Ejecutar la app | |
| if __name__ == "__main__": | |
| interface.launch() |