remove
Browse files- app.py +52 -52
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from PIL import Image, ImageOps
|
|
| 11 |
import numpy as np
|
| 12 |
from simple_lama_inpainting import SimpleLama
|
| 13 |
from contextlib import contextmanager
|
| 14 |
-
import whisperx
|
| 15 |
import gc
|
| 16 |
|
| 17 |
@contextmanager
|
|
@@ -174,61 +174,61 @@ def erase(image=None, mask=None):
|
|
| 174 |
return simple_lama(image, mask)
|
| 175 |
|
| 176 |
|
| 177 |
-
def transcribe(audio):
|
| 178 |
-
|
| 179 |
-
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
|
| 224 |
-
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
|
| 234 |
@spaces.GPU(duration=120)
|
|
@@ -245,8 +245,8 @@ def main(*args):
|
|
| 245 |
# return mask_generation(*args)
|
| 246 |
elif api_num == 5:
|
| 247 |
return erase(*args)
|
| 248 |
-
elif api_num == 6:
|
| 249 |
-
|
| 250 |
|
| 251 |
|
| 252 |
rmbg_tab = gr.Interface(
|
|
@@ -367,7 +367,7 @@ demo = gr.TabbedInterface(
|
|
| 367 |
"inpainting",
|
| 368 |
# "sam2",
|
| 369 |
"erase",
|
| 370 |
-
"transcribe",
|
| 371 |
],
|
| 372 |
title="Utilities that require GPU",
|
| 373 |
)
|
|
|
|
| 11 |
import numpy as np
|
| 12 |
from simple_lama_inpainting import SimpleLama
|
| 13 |
from contextlib import contextmanager
|
| 14 |
+
# import whisperx
|
| 15 |
import gc
|
| 16 |
|
| 17 |
@contextmanager
|
|
|
|
| 174 |
return simple_lama(image, mask)
|
| 175 |
|
| 176 |
|
| 177 |
+
# def transcribe(audio):
|
| 178 |
+
# if audio is None:
|
| 179 |
+
# raise gr.Error("No audio file submitted!")
|
| 180 |
|
| 181 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 182 |
+
# compute_type = "float16"
|
| 183 |
+
# batch_size = 8 # reduced batch size to be conservative with memory
|
| 184 |
|
| 185 |
+
# try:
|
| 186 |
+
# # 1. Load model and transcribe
|
| 187 |
+
# model = whisperx.load_model("large-v2", device, compute_type=compute_type)
|
| 188 |
+
# audio_input = whisperx.load_audio(audio)
|
| 189 |
+
# result = model.transcribe(audio_input, batch_size=batch_size)
|
| 190 |
|
| 191 |
+
# # Clear GPU memory
|
| 192 |
+
# del model
|
| 193 |
+
# gc.collect()
|
| 194 |
+
# torch.cuda.empty_cache()
|
| 195 |
+
|
| 196 |
+
# # 2. Align whisper output
|
| 197 |
+
# model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
|
| 198 |
+
# result = whisperx.align(result["segments"], model_a, metadata, audio_input, device, return_char_alignments=False)
|
| 199 |
+
|
| 200 |
+
# # Clear GPU memory
|
| 201 |
+
# del model_a
|
| 202 |
+
# gc.collect()
|
| 203 |
+
# torch.cuda.empty_cache()
|
| 204 |
+
|
| 205 |
+
# # 3. Assign speaker labels
|
| 206 |
+
# diarize_model = whisperx.DiarizationPipeline(device=device)
|
| 207 |
+
# diarize_segments = diarize_model(audio_input)
|
| 208 |
|
| 209 |
+
# # Combine transcription with speaker diarization
|
| 210 |
+
# result = whisperx.assign_word_speakers(diarize_segments, result)
|
| 211 |
+
|
| 212 |
+
# # Format output with speaker labels and timestamps
|
| 213 |
+
# formatted_text = []
|
| 214 |
+
# for segment in result["segments"]:
|
| 215 |
+
# if not isinstance(segment, dict):
|
| 216 |
+
# continue
|
| 217 |
|
| 218 |
+
# speaker = f"[Speaker {segment.get('speaker', 'Unknown')}]"
|
| 219 |
+
# start_time = f"{float(segment.get('start', 0)):.2f}"
|
| 220 |
+
# end_time = f"{float(segment.get('end', 0)):.2f}"
|
| 221 |
+
# text = segment.get('text', '').strip()
|
| 222 |
+
# formatted_text.append(f"[{start_time}s - {end_time}s] {speaker}: {text}")
|
| 223 |
|
| 224 |
+
# return "\n".join(formatted_text)
|
| 225 |
|
| 226 |
+
# except Exception as e:
|
| 227 |
+
# raise gr.Error(f"Transcription failed: {str(e)}")
|
| 228 |
+
# finally:
|
| 229 |
+
# # Ensure GPU memory is cleared even if an error occurs
|
| 230 |
+
# gc.collect()
|
| 231 |
+
# torch.cuda.empty_cache()
|
| 232 |
|
| 233 |
|
| 234 |
@spaces.GPU(duration=120)
|
|
|
|
| 245 |
# return mask_generation(*args)
|
| 246 |
elif api_num == 5:
|
| 247 |
return erase(*args)
|
| 248 |
+
# elif api_num == 6:
|
| 249 |
+
# return transcribe(*args)
|
| 250 |
|
| 251 |
|
| 252 |
rmbg_tab = gr.Interface(
|
|
|
|
| 367 |
"inpainting",
|
| 368 |
# "sam2",
|
| 369 |
"erase",
|
| 370 |
+
# "transcribe",
|
| 371 |
],
|
| 372 |
title="Utilities that require GPU",
|
| 373 |
)
|
requirements.txt
CHANGED
|
@@ -22,4 +22,4 @@ einops
|
|
| 22 |
# git+https://github.com/facebookresearch/sam2.git
|
| 23 |
matplotlib
|
| 24 |
simple-lama-inpainting
|
| 25 |
-
git+https://github.com/m-bain/whisperX.git
|
|
|
|
| 22 |
# git+https://github.com/facebookresearch/sam2.git
|
| 23 |
matplotlib
|
| 24 |
simple-lama-inpainting
|
| 25 |
+
# git+https://github.com/m-bain/whisperX.git
|