Spaces:

Kaiyeee
/

MultiModal_Chat_Using_OpenVino_and_Whisper

Sleeping

Kaiyeee commited on May 31, 2025

Commit

3d5c597

verified ·

1 Parent(s): 89c92b7

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import soundfile as sf
+from transformers import AutoProcessor, pipeline
+from optimum.intel.openvino import OVModelForSpeechSeq2Seq
+# Load model + processor
+model_id = "distil-whisper/distil-large-v2"
+processor = AutoProcessor.from_pretrained(model_id)
+ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
+ov_model.generation_config.max_new_tokens = 128
+# Create HF pipeline
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=ov_model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    chunk_length_s=15,
+    batch_size=16,
+)
+# Transcription function
+def transcribe(audio):
+    audio_array, sampling_rate = sf.read(audio)
+    result = pipe(audio_array)
+    return result["text"]
+# Launch Gradio UI
+gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(type="filepath"),
+    outputs="text",
+    title="🧠 Distil-Whisper + OpenVINO ASR",
+    description="Upload audio to transcribe using Distil-Whisper accelerated with Intel OpenVINO.",
+).launch()