Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
from faster_whisper import WhisperModel
|
| 3 |
import datetime
|
| 4 |
import subprocess
|
|
@@ -218,13 +218,7 @@ def get_youtube(video_url):
|
|
| 218 |
|
| 219 |
def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
|
| 220 |
"""
|
| 221 |
-
# Transcribe youtube link using OpenAI Whisper
|
| 222 |
-
1. Using Open AI's Whisper model to seperate audio into segments and generate transcripts.
|
| 223 |
-
2. Generating speaker embeddings for each segments.
|
| 224 |
-
3. Applying agglomerative clustering on the embeddings to identify the speaker for each segment.
|
| 225 |
|
| 226 |
-
Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
|
| 227 |
-
Speaker diarization model and pipeline from by https://github.com/pyannote/pyannote-audio
|
| 228 |
"""
|
| 229 |
|
| 230 |
# model = whisper.load_model(whisper_model)
|
|
@@ -405,9 +399,7 @@ with demo:
|
|
| 405 |
video_in.render()
|
| 406 |
with gr.Column():
|
| 407 |
gr.Markdown('''
|
| 408 |
-
|
| 409 |
-
##### Please select the source language for transcription.
|
| 410 |
-
##### You can select a range of assumed numbers of speakers.
|
| 411 |
''')
|
| 412 |
selected_source_lang.render()
|
| 413 |
selected_whisper_model.render()
|
|
|
|
| 1 |
+
import whisper
|
| 2 |
from faster_whisper import WhisperModel
|
| 3 |
import datetime
|
| 4 |
import subprocess
|
|
|
|
| 218 |
|
| 219 |
def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
|
| 220 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
|
|
|
|
|
|
| 222 |
"""
|
| 223 |
|
| 224 |
# model = whisper.load_model(whisper_model)
|
|
|
|
| 399 |
video_in.render()
|
| 400 |
with gr.Column():
|
| 401 |
gr.Markdown('''
|
| 402 |
+
|
|
|
|
|
|
|
| 403 |
''')
|
| 404 |
selected_source_lang.render()
|
| 405 |
selected_whisper_model.render()
|