Spaces:
Sleeping
Sleeping
chore: tidy Space repository structure
#1
by Blazestorm001 - opened
- README.md +0 -8
- packages.txt +2 -0
- requirements.txt +3 -2
- src/amuseme/transcriber.py +12 -3
README.md
CHANGED
|
@@ -20,11 +20,6 @@ tags:
|
|
| 20 |
- music
|
| 21 |
- hackathon
|
| 22 |
- build-small
|
| 23 |
-
- track:wood
|
| 24 |
-
- sponsor:openbmb
|
| 25 |
-
- sponsor:openai
|
| 26 |
-
- achievement:offgrid
|
| 27 |
-
- achievement:fieldnotes
|
| 28 |
short_description: AI-powered kinetic typography lyric video generator
|
| 29 |
---
|
| 30 |
|
|
@@ -150,6 +145,3 @@ https://youtu.be/6RJwgFu6LHQ
|
|
| 150 |
|
| 151 |
## Tested on:
|
| 152 |
RTX 5060 ti 16 GB
|
| 153 |
-
|
| 154 |
-
## SOCIAL MEDIA POST:
|
| 155 |
-
https://dev.to/blazestorm/amuseme-when-small-models-compose-a-visual-symphony-50fc
|
|
|
|
| 20 |
- music
|
| 21 |
- hackathon
|
| 22 |
- build-small
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
short_description: AI-powered kinetic typography lyric video generator
|
| 24 |
---
|
| 25 |
|
|
|
|
| 145 |
|
| 146 |
## Tested on:
|
| 147 |
RTX 5060 ti 16 GB
|
|
|
|
|
|
|
|
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fonts-dejavu-core
|
| 2 |
+
fonts-liberation
|
requirements.txt
CHANGED
|
@@ -3,10 +3,11 @@
|
|
| 3 |
# torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
|
| 4 |
# so they are intentionally not pinned here.
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
faster-whisper
|
| 7 |
-
ctranslate2==4.3.1
|
| 8 |
demucs
|
| 9 |
-
torchcodec
|
| 10 |
pillow
|
| 11 |
pydantic
|
| 12 |
spaces
|
|
|
|
| 3 |
# torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
|
| 4 |
# so they are intentionally not pinned here.
|
| 5 |
|
| 6 |
+
# ctranslate2: do NOT pin — let pip resolve the wheel matching ZeroGPU's
|
| 7 |
+
# CUDA/cuDNN version. Pinning 4.3.1 (cuDNN 8 build) breaks on ZeroGPU's cuDNN 9
|
| 8 |
+
# with "libcudnn_ops_infer.so.8: cannot open shared object file".
|
| 9 |
faster-whisper
|
|
|
|
| 10 |
demucs
|
|
|
|
| 11 |
pillow
|
| 12 |
pydantic
|
| 13 |
spaces
|
src/amuseme/transcriber.py
CHANGED
|
@@ -70,15 +70,24 @@ def _load_model(model_size: str = "large-v3"):
|
|
| 70 |
if _model is None:
|
| 71 |
if model_size == "turbo":
|
| 72 |
model_size = "large-v3-turbo"
|
| 73 |
-
|
| 74 |
device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
|
| 75 |
logger.info(f"Loading Whisper {model_size} on {device}...")
|
| 76 |
compute_type = "float16" if device == "cuda" else "int8"
|
| 77 |
try:
|
| 78 |
_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 79 |
except Exception as e:
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return _model
|
| 83 |
|
| 84 |
|
|
|
|
| 70 |
if _model is None:
|
| 71 |
if model_size == "turbo":
|
| 72 |
model_size = "large-v3-turbo"
|
| 73 |
+
|
| 74 |
device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
|
| 75 |
logger.info(f"Loading Whisper {model_size} on {device}...")
|
| 76 |
compute_type = "float16" if device == "cuda" else "int8"
|
| 77 |
try:
|
| 78 |
_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 79 |
except Exception as e:
|
| 80 |
+
if device == "cuda":
|
| 81 |
+
# CUDA/cuDNN library mismatch (e.g. on ZeroGPU). Retry on CPU.
|
| 82 |
+
logger.warning(f"CUDA load failed ({e}). Falling back to CPU int8.")
|
| 83 |
+
try:
|
| 84 |
+
_model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 85 |
+
except Exception as e2:
|
| 86 |
+
logger.error(f"CPU fallback also failed: {e2}")
|
| 87 |
+
raise
|
| 88 |
+
else:
|
| 89 |
+
logger.warning(f"Failed to load with {compute_type}: {e}. Retrying with float32.")
|
| 90 |
+
_model = WhisperModel(model_size, device=device, compute_type="float32")
|
| 91 |
return _model
|
| 92 |
|
| 93 |
|