Spaces:

Lenylvt
/

Whisper-API

Build error

App Files Files Community

Lenylvt commited on Feb 17, 2024

Commit

7752cd2

verified ·

1 Parent(s): 6cb9375

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -11

app.py CHANGED Viewed

@@ -1,23 +1,42 @@
 import gradio as gr
-import whisper
-# Load the Whisper model
-model = whisper.load_model("base")
-def transcribe(audio_file):
-    # Process the audio file directly with the file path
-    result = model.transcribe(audio_file)
-    # Return the transcription
-    return result['text']
-# Create the Gradio interface
 iface = gr.Interface(fn=transcribe,
                      inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                      outputs="text",
-                     title="Whisper Transcription",
-                     description="Upload an audio file to transcribe it using OpenAI's Whisper model.")
 # Launch the app
 if __name__ == "__main__":
     iface.launch()

 import gradio as gr
+from faster_whisper import WhisperModel
+import logging
+# Configure logging for debugging purposes
+logging.basicConfig()
+logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
+# Initialize the Whisper model with your desired configuration
+model_size = "large-v3"  # Choose the model size
+device = "cpu"  # or "cuda" if GPU is available
+compute_type = "float16"  # Choose the compute type based on your hardware
+model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
+def transcribe(audio_file):
+    # Enable word-level timestamps
+    segments, _ = model.transcribe(audio_file, word_timestamps=True)
+    # Format and gather transcription with timestamps
+    transcription_with_timestamps = []
+    for segment in segments:
+        segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n"
+        # If word-level detail is desired
+        word_details = "\n".join(
+            f"    [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words
+        )
+        transcription_with_timestamps.append(segment_text + word_details)
+    return "\n".join(transcription_with_timestamps)
+# Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
                      inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                      outputs="text",
+                     title="Enhanced Whisper Transcription with Timestamps",
+                     description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":
     iface.launch()