File size: 949 Bytes
a0414ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import torch
from transformers import pipeline
import numpy as np

pipe_base = pipeline("automatic-speech-recognition", model="aitor-medrano/whisper-base-lara")
pipe_small = pipeline("automatic-speech-recognition", model="aitor-medrano/whisper-small-lara")

def greet(modelo, grabacion):
    sr, y = grabacion
    # Pasamos el array de muestras a tipo NumPy de 32 bits
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if modelo == "Base":
        pipe = pipe_base
    else:
        pipe = pipe_small

    return modelo + ":" + pipe({"sampling_rate": sr, "raw": y})["text"]

demo = gr.Interface(fn=greet,
                    inputs=[
                            gr.Dropdown(
                                ["Base", "Small"], label="Modelo", info="Modelos de Lara entrenados"
                            ),
                            gr.Audio()
                    ],
                    outputs="text")
demo.launch()