DeeeeeeM
commited on
Commit
·
e8fae30
1
Parent(s):
5899607
added initial prompt
Browse files
app.py
CHANGED
|
@@ -11,11 +11,15 @@ import time
|
|
| 11 |
def process_media(
|
| 12 |
model_size, source_lang, upload, model_type,
|
| 13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
| 14 |
-
max_lines_per_segment, line_penalty, longest_line_char_penalty,
|
|
|
|
|
|
|
| 15 |
):
|
|
|
|
|
|
|
|
|
|
| 16 |
start_time = time.time()
|
| 17 |
|
| 18 |
-
# ----- is file empty? checker ----- #
|
| 19 |
if upload is None:
|
| 20 |
return None, None, None, None
|
| 21 |
|
|
@@ -25,11 +29,26 @@ def process_media(
|
|
| 25 |
if model_type == "faster whisper":
|
| 26 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 27 |
model = stable_whisper.load_faster_whisper(model_size, device=device)
|
| 28 |
-
result = model.transcribe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
else:
|
| 30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 31 |
model = stable_whisper.load_model(model_size, device=device)
|
| 32 |
-
result = model.transcribe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
#, batch_size=16, denoiser="demucs"
|
| 34 |
#result.save_as_json(word_transcription_path)
|
| 35 |
|
|
@@ -299,9 +318,8 @@ with gr.Blocks() as interface:
|
|
| 299 |
source_lang = gr.Dropdown(
|
| 300 |
choices=WHISPER_LANGUAGES,
|
| 301 |
label="Source Language",
|
| 302 |
-
value="tl",
|
| 303 |
-
interactive=True
|
| 304 |
-
allow_custom_value=False
|
| 305 |
)
|
| 306 |
model_type = gr.Dropdown(
|
| 307 |
choices=["faster whisper", "whisper"],
|
|
@@ -324,6 +342,12 @@ with gr.Blocks() as interface:
|
|
| 324 |
value="deepdml/faster-whisper-large-v3-turbo-ct2",
|
| 325 |
interactive=True
|
| 326 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
#Advanced Settings
|
| 329 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
| 11 |
def process_media(
|
| 12 |
model_size, source_lang, upload, model_type,
|
| 13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
| 14 |
+
max_lines_per_segment, line_penalty, longest_line_char_penalty,
|
| 15 |
+
initial_prompt=None, #
|
| 16 |
+
*args
|
| 17 |
):
|
| 18 |
+
if not initial_prompt:
|
| 19 |
+
initial_prompt = None
|
| 20 |
+
|
| 21 |
start_time = time.time()
|
| 22 |
|
|
|
|
| 23 |
if upload is None:
|
| 24 |
return None, None, None, None
|
| 25 |
|
|
|
|
| 29 |
if model_type == "faster whisper":
|
| 30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 31 |
model = stable_whisper.load_faster_whisper(model_size, device=device)
|
| 32 |
+
result = model.transcribe(
|
| 33 |
+
temp_path,
|
| 34 |
+
language=source_lang,
|
| 35 |
+
vad=True,
|
| 36 |
+
regroup=False,
|
| 37 |
+
no_speech_threshold=0.9,
|
| 38 |
+
initial_prompt=initial_prompt # <-- pass here
|
| 39 |
+
)
|
| 40 |
else:
|
| 41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 42 |
model = stable_whisper.load_model(model_size, device=device)
|
| 43 |
+
result = model.transcribe(
|
| 44 |
+
temp_path,
|
| 45 |
+
language=source_lang,
|
| 46 |
+
vad=True,
|
| 47 |
+
regroup=False,
|
| 48 |
+
no_speech_threshold=0.9,
|
| 49 |
+
denoiser="demucs",
|
| 50 |
+
initial_prompt=initial_prompt # <-- pass here
|
| 51 |
+
)
|
| 52 |
#, batch_size=16, denoiser="demucs"
|
| 53 |
#result.save_as_json(word_transcription_path)
|
| 54 |
|
|
|
|
| 318 |
source_lang = gr.Dropdown(
|
| 319 |
choices=WHISPER_LANGUAGES,
|
| 320 |
label="Source Language",
|
| 321 |
+
value="tl",
|
| 322 |
+
interactive=True
|
|
|
|
| 323 |
)
|
| 324 |
model_type = gr.Dropdown(
|
| 325 |
choices=["faster whisper", "whisper"],
|
|
|
|
| 342 |
value="deepdml/faster-whisper-large-v3-turbo-ct2",
|
| 343 |
interactive=True
|
| 344 |
)
|
| 345 |
+
initial_prompt = gr.Textbox(
|
| 346 |
+
label="Initial Prompt (optional)",
|
| 347 |
+
lines=3,
|
| 348 |
+
placeholder="Add context, names, or style for the model here",
|
| 349 |
+
interactive=True
|
| 350 |
+
)
|
| 351 |
|
| 352 |
#Advanced Settings
|
| 353 |
with gr.Accordion("Advanced Settings", open=False):
|