Spaces:
Running
on
Zero
Running
on
Zero
Update gradio_mix.py
Browse files- gradio_mix.py +12 -3
gradio_mix.py
CHANGED
|
@@ -642,7 +642,9 @@ def get_transcribe_state(segments):
|
|
| 642 |
"word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in segments["words"]]
|
| 643 |
}
|
| 644 |
|
| 645 |
-
|
|
|
|
|
|
|
| 646 |
def transcribe(seed, audio_info):
|
| 647 |
if transcribe_model is None:
|
| 648 |
raise gr.Error("Transcription model not loaded")
|
|
@@ -659,6 +661,9 @@ def transcribe(seed, audio_info):
|
|
| 659 |
state
|
| 660 |
]
|
| 661 |
|
|
|
|
|
|
|
|
|
|
| 662 |
def align(transcript, audio_info, state):
|
| 663 |
lang = state["segments"]["lang"]
|
| 664 |
# print("realign: ", transcript, state)
|
|
@@ -683,7 +688,9 @@ def align(transcript, audio_info, state):
|
|
| 683 |
state
|
| 684 |
]
|
| 685 |
|
| 686 |
-
|
|
|
|
|
|
|
| 687 |
def denoise(audio_info):
|
| 688 |
denoised_audio, sr = denoise_model.denoise(audio_info)
|
| 689 |
denoised_audio = denoised_audio # .squeeze().numpy()
|
|
@@ -725,7 +732,9 @@ def replace_numbers_with_words(sentence, lang="en"):
|
|
| 725 |
return num # In case num2words fails (unlikely with digits but just to be safe)
|
| 726 |
return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
|
| 727 |
|
| 728 |
-
|
|
|
|
|
|
|
| 729 |
def run(seed, nfe_step, speed, cfg_strength, sway_sampling_coef, ref_ratio,
|
| 730 |
audio_info, denoised_audio, transcribe_state, transcript, smart_transcript,
|
| 731 |
mode, start_time, end_time,
|
|
|
|
| 642 |
"word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in segments["words"]]
|
| 643 |
}
|
| 644 |
|
| 645 |
+
@spaces.GPU
|
| 646 |
+
@torch.no_grad()
|
| 647 |
+
@torch.inference_mode()
|
| 648 |
def transcribe(seed, audio_info):
|
| 649 |
if transcribe_model is None:
|
| 650 |
raise gr.Error("Transcription model not loaded")
|
|
|
|
| 661 |
state
|
| 662 |
]
|
| 663 |
|
| 664 |
+
@spaces.GPU
|
| 665 |
+
@torch.no_grad()
|
| 666 |
+
@torch.inference_mode()
|
| 667 |
def align(transcript, audio_info, state):
|
| 668 |
lang = state["segments"]["lang"]
|
| 669 |
# print("realign: ", transcript, state)
|
|
|
|
| 688 |
state
|
| 689 |
]
|
| 690 |
|
| 691 |
+
@spaces.GPU
|
| 692 |
+
@torch.no_grad()
|
| 693 |
+
@torch.inference_mode()
|
| 694 |
def denoise(audio_info):
|
| 695 |
denoised_audio, sr = denoise_model.denoise(audio_info)
|
| 696 |
denoised_audio = denoised_audio # .squeeze().numpy()
|
|
|
|
| 732 |
return num # In case num2words fails (unlikely with digits but just to be safe)
|
| 733 |
return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
|
| 734 |
|
| 735 |
+
@spaces.GPU
|
| 736 |
+
@torch.no_grad()
|
| 737 |
+
@torch.inference_mode()
|
| 738 |
def run(seed, nfe_step, speed, cfg_strength, sway_sampling_coef, ref_ratio,
|
| 739 |
audio_info, denoised_audio, transcribe_state, transcript, smart_transcript,
|
| 740 |
mode, start_time, end_time,
|