Commit
·
03195d9
1
Parent(s):
db9006e
device move to auto; removed unused bool vars for audio/video interface change
Browse files- app.py +0 -2
- src/transcriber.py +1 -1
app.py
CHANGED
|
@@ -7,7 +7,6 @@ def main():
|
|
| 7 |
gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
|
| 8 |
with gr.Tabs(selected="video") as tabs:
|
| 9 |
with gr.Tab("Video", id="video"):
|
| 10 |
-
video = True
|
| 11 |
file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
|
| 12 |
file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
|
| 13 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
|
@@ -26,7 +25,6 @@ def main():
|
|
| 26 |
allow_flagging="never"
|
| 27 |
)
|
| 28 |
with gr.Tab("Audio", id = "audio"):
|
| 29 |
-
video = False
|
| 30 |
file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
|
| 31 |
file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
|
| 32 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
|
|
|
| 7 |
gr.Markdown('An automatic speech recognition tool using [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Supports multilingual video transcription and translation to english. Users may set the max words per line.')
|
| 8 |
with gr.Tabs(selected="video") as tabs:
|
| 9 |
with gr.Tab("Video", id="video"):
|
|
|
|
| 10 |
file = gr.File(file_types=["video"],type="filepath", label="Upload a video")
|
| 11 |
file_type = gr.Radio(choices=["video"], value="video", label="File Type", visible=False)
|
| 12 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
|
|
|
| 25 |
allow_flagging="never"
|
| 26 |
)
|
| 27 |
with gr.Tab("Audio", id = "audio"):
|
|
|
|
| 28 |
file = gr.File(file_types=["audio"],type="filepath", label="Upload an audio file")
|
| 29 |
file_type = gr.Radio(choices=["audio"], value="audio", label="File Type", visible=False)
|
| 30 |
max_words_per_line = gr.Number(value=6, label="Max words per line")
|
src/transcriber.py
CHANGED
|
@@ -77,7 +77,7 @@ def transcriber(file_input:gr.File,
|
|
| 77 |
audio_input = convert_video_to_audio(file_input)
|
| 78 |
else:
|
| 79 |
audio_input = file_input
|
| 80 |
-
model = WhisperModel(model_version, device="
|
| 81 |
segments, _ = model.transcribe(
|
| 82 |
audio_input,
|
| 83 |
beam_size=5,
|
|
|
|
| 77 |
audio_input = convert_video_to_audio(file_input)
|
| 78 |
else:
|
| 79 |
audio_input = file_input
|
| 80 |
+
model = WhisperModel(model_version, device="auto", compute_type="int8")
|
| 81 |
segments, _ = model.transcribe(
|
| 82 |
audio_input,
|
| 83 |
beam_size=5,
|