distil-small.en

Sleeping

App Files Files Community

reach-vb commited on Dec 7, 2023

Commit

f2b1f57

1 Parent(s): 5290d3e

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
 import tempfile
 import os
-MODEL_NAME = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
@@ -83,7 +83,7 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return html_embed_str, text
@@ -94,12 +94,12 @@ mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
     ],
     outputs="text",
     layout="horizontal",
     theme="huggingface",
-    title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
@@ -112,12 +112,12 @@ file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
-        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
     ],
     outputs="text",
     layout="horizontal",
     theme="huggingface",
-    title="Whisper Large V3: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
@@ -130,12 +130,12 @@ yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
         gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe")
     ],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
-    title="Whisper Large V3: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
         f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"

 import tempfile
 import os
+MODEL_NAME = "distil-whisper/distil-small.en"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+    text = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)["text"]
     return html_embed_str, text
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="microphone", type="filepath", optional=True),
+        #gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
     ],
     outputs="text",
     layout="horizontal",
     theme="huggingface",
+    title="Distil-Whisper small: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
     fn=transcribe,
     inputs=[
         gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
+        # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
     ],
     outputs="text",
     layout="horizontal",
     theme="huggingface",
+    title="Distil-Whisper small: Transcribe Audio",
     description=(
         "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
         f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
     fn=yt_transcribe,
     inputs=[
         gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe")
     ],
     outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
+    title="Distil-Whisper small: Transcribe YouTube",
     description=(
         "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
         f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"