File size: 1,044 Bytes
3d5c597 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
import soundfile as sf
from transformers import AutoProcessor, pipeline
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
# Load model + processor
model_id = "distil-whisper/distil-large-v2"
processor = AutoProcessor.from_pretrained(model_id)
ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
ov_model.generation_config.max_new_tokens = 128
# Create HF pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=ov_model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
chunk_length_s=15,
batch_size=16,
)
# Transcription function
def transcribe(audio):
audio_array, sampling_rate = sf.read(audio)
result = pipe(audio_array)
return result["text"]
# Launch Gradio UI
gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="🧠 Distil-Whisper + OpenVINO ASR",
description="Upload audio to transcribe using Distil-Whisper accelerated with Intel OpenVINO.",
).launch()
|