Spaces:
Runtime error
Runtime error
use latest torch
Browse files- pyproject.toml +0 -11
- requirements.txt +38 -7
- sample.py +13 -10
pyproject.toml
CHANGED
|
@@ -11,15 +11,4 @@ dependencies = [
|
|
| 11 |
"soundfile>=0.12.1",
|
| 12 |
"torch>=2.5.1",
|
| 13 |
"vocos>=0.1.0",
|
| 14 |
-
]
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
[[tool.uv.index]]
|
| 18 |
-
name = "pytorch-cpu"
|
| 19 |
-
url = "https://download.pytorch.org/whl/cpu"
|
| 20 |
-
explicit = true
|
| 21 |
-
|
| 22 |
-
[tool.uv.sources]
|
| 23 |
-
torch = [
|
| 24 |
-
{ index = "pytorch-cpu", marker = "platform_system != 'Darwin'"},
|
| 25 |
]
|
|
|
|
| 11 |
"soundfile>=0.12.1",
|
| 12 |
"torch>=2.5.1",
|
| 13 |
"vocos>=0.1.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
requirements.txt
CHANGED
|
@@ -1,9 +1,5 @@
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
-
# uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu
|
| 3 |
-
--index-url https://pypi.org/simple
|
| 4 |
-
--extra-index-url https://download.pytorch.org/whl/cu113
|
| 5 |
-
|
| 6 |
-
|
| 7 |
aiofiles==23.2.1
|
| 8 |
# via gradio
|
| 9 |
annotated-types==0.7.0
|
|
@@ -43,6 +39,7 @@ filelock==3.13.1
|
|
| 43 |
# via
|
| 44 |
# huggingface-hub
|
| 45 |
# torch
|
|
|
|
| 46 |
fsspec==2024.2.0
|
| 47 |
# via
|
| 48 |
# gradio-client
|
|
@@ -109,6 +106,38 @@ numpy==1.26.3
|
|
| 109 |
# pandas
|
| 110 |
# scipy
|
| 111 |
# vocos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
orjson==3.10.12
|
| 113 |
# via gradio
|
| 114 |
packaging==24.1
|
|
@@ -175,13 +204,13 @@ sympy==1.13.1
|
|
| 175 |
# via torch
|
| 176 |
tomlkit==0.13.2
|
| 177 |
# via gradio
|
| 178 |
-
torch
|
| 179 |
# via
|
| 180 |
# diffusion-speech-360h (pyproject.toml)
|
| 181 |
# encodec
|
| 182 |
# torchaudio
|
| 183 |
# vocos
|
| 184 |
-
torchaudio
|
| 185 |
# via
|
| 186 |
# encodec
|
| 187 |
# vocos
|
|
@@ -189,6 +218,8 @@ tqdm==4.66.5
|
|
| 189 |
# via
|
| 190 |
# huggingface-hub
|
| 191 |
# nltk
|
|
|
|
|
|
|
| 192 |
typeguard==4.2.0
|
| 193 |
# via inflect
|
| 194 |
typer==0.15.1
|
|
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv pip compile pyproject.toml -o requirements.txt --python-platform x86_64-unknown-linux-gnu
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
aiofiles==23.2.1
|
| 4 |
# via gradio
|
| 5 |
annotated-types==0.7.0
|
|
|
|
| 39 |
# via
|
| 40 |
# huggingface-hub
|
| 41 |
# torch
|
| 42 |
+
# triton
|
| 43 |
fsspec==2024.2.0
|
| 44 |
# via
|
| 45 |
# gradio-client
|
|
|
|
| 106 |
# pandas
|
| 107 |
# scipy
|
| 108 |
# vocos
|
| 109 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 110 |
+
# via
|
| 111 |
+
# nvidia-cudnn-cu12
|
| 112 |
+
# nvidia-cusolver-cu12
|
| 113 |
+
# torch
|
| 114 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 115 |
+
# via torch
|
| 116 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 117 |
+
# via torch
|
| 118 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 119 |
+
# via torch
|
| 120 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 121 |
+
# via torch
|
| 122 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 123 |
+
# via torch
|
| 124 |
+
nvidia-curand-cu12==10.3.5.147
|
| 125 |
+
# via torch
|
| 126 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 127 |
+
# via torch
|
| 128 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 129 |
+
# via
|
| 130 |
+
# nvidia-cusolver-cu12
|
| 131 |
+
# torch
|
| 132 |
+
nvidia-nccl-cu12==2.21.5
|
| 133 |
+
# via torch
|
| 134 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 135 |
+
# via
|
| 136 |
+
# nvidia-cusolver-cu12
|
| 137 |
+
# nvidia-cusparse-cu12
|
| 138 |
+
# torch
|
| 139 |
+
nvidia-nvtx-cu12==12.4.127
|
| 140 |
+
# via torch
|
| 141 |
orjson==3.10.12
|
| 142 |
# via gradio
|
| 143 |
packaging==24.1
|
|
|
|
| 204 |
# via torch
|
| 205 |
tomlkit==0.13.2
|
| 206 |
# via gradio
|
| 207 |
+
torch==2.5.1
|
| 208 |
# via
|
| 209 |
# diffusion-speech-360h (pyproject.toml)
|
| 210 |
# encodec
|
| 211 |
# torchaudio
|
| 212 |
# vocos
|
| 213 |
+
torchaudio==2.5.1
|
| 214 |
# via
|
| 215 |
# encodec
|
| 216 |
# vocos
|
|
|
|
| 218 |
# via
|
| 219 |
# huggingface-hub
|
| 220 |
# nltk
|
| 221 |
+
triton==3.1.0
|
| 222 |
+
# via torch
|
| 223 |
typeguard==4.2.0
|
| 224 |
# via inflect
|
| 225 |
typer==0.15.1
|
sample.py
CHANGED
|
@@ -174,11 +174,12 @@ def sample(
|
|
| 174 |
embedding_vocab_size=model_config["embedding_vocab_size"],
|
| 175 |
learn_sigma=model_config["learn_sigma"],
|
| 176 |
in_channels=data_config["data_dim"],
|
| 177 |
-
).to(device)
|
| 178 |
|
| 179 |
state_dict = find_model(ckpt_path)
|
| 180 |
model.load_state_dict(state_dict)
|
| 181 |
model.eval() # important!
|
|
|
|
| 182 |
model_cache[ckpt_path] = model
|
| 183 |
else:
|
| 184 |
model = model_cache[ckpt_path]
|
|
@@ -206,15 +207,17 @@ def sample(
|
|
| 206 |
attn_mask=attn_mask,
|
| 207 |
)
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
| 218 |
samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
|
| 219 |
return samples
|
| 220 |
|
|
|
|
| 174 |
embedding_vocab_size=model_config["embedding_vocab_size"],
|
| 175 |
learn_sigma=model_config["learn_sigma"],
|
| 176 |
in_channels=data_config["data_dim"],
|
| 177 |
+
).to(device).bfloat16
|
| 178 |
|
| 179 |
state_dict = find_model(ckpt_path)
|
| 180 |
model.load_state_dict(state_dict)
|
| 181 |
model.eval() # important!
|
| 182 |
+
model = model.bfloat16()
|
| 183 |
model_cache[ckpt_path] = model
|
| 184 |
else:
|
| 185 |
model = model_cache[ckpt_path]
|
|
|
|
| 207 |
attn_mask=attn_mask,
|
| 208 |
)
|
| 209 |
|
| 210 |
+
with torch.no_grad():
|
| 211 |
+
with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
|
| 212 |
+
samples = diffusion.p_sample_loop(
|
| 213 |
+
model.forward_with_cfg,
|
| 214 |
+
z.shape,
|
| 215 |
+
z,
|
| 216 |
+
clip_denoised=False,
|
| 217 |
+
model_kwargs=model_kwargs,
|
| 218 |
+
progress=True,
|
| 219 |
+
device=device,
|
| 220 |
+
)
|
| 221 |
samples = [s.chunk(2, dim=0)[0] for s in samples] # Remove null class samples
|
| 222 |
return samples
|
| 223 |
|