Spaces:
Sleeping
Sleeping
mrq
commited on
Commit
·
181ff0a
1
Parent(s):
a5c1bd1
- app.py +13 -2
- requirements.txt +1 -2
app.py
CHANGED
|
@@ -99,8 +99,7 @@ def get_model_paths( paths=[Path("./training/"), Path("./models/"), Path("./data
|
|
| 99 |
continue
|
| 100 |
configs.append( sft )
|
| 101 |
|
| 102 |
-
|
| 103 |
-
configs = [ str(p) for p in configs ]
|
| 104 |
|
| 105 |
return configs
|
| 106 |
|
|
@@ -205,6 +204,8 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
| 205 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
| 206 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 207 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
|
|
|
|
|
|
| 208 |
parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
|
| 209 |
parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
|
| 210 |
parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
|
|
@@ -258,11 +259,18 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
| 258 |
if kwargs.pop("refine-on-stop", False):
|
| 259 |
args.refine_on_stop = True
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
tts = init_tts()
|
| 262 |
|
| 263 |
gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
|
| 264 |
|
| 265 |
sampling_kwargs = dict(
|
|
|
|
|
|
|
| 266 |
max_steps=args.max_steps,
|
| 267 |
max_levels=args.max_levels,
|
| 268 |
max_duration=args.max_duration,
|
|
@@ -438,6 +446,9 @@ with ui:
|
|
| 438 |
layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
|
| 439 |
layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
|
| 440 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
|
|
|
|
|
|
|
|
|
|
| 441 |
with gr.Tab("Sampler Settings"):
|
| 442 |
with gr.Row():
|
| 443 |
layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
|
|
|
|
| 99 |
continue
|
| 100 |
configs.append( sft )
|
| 101 |
|
| 102 |
+
configs = [ str(p) for p in configs ]
|
|
|
|
| 103 |
|
| 104 |
return configs
|
| 105 |
|
|
|
|
| 204 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
| 205 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 206 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
| 207 |
+
parser.add_argument("--split-text-by", type=str, default=kwargs["split-text-by"])
|
| 208 |
+
parser.add_argument("--context-history", type=int, default=kwargs["context-history"])
|
| 209 |
parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
|
| 210 |
parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
|
| 211 |
parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
|
|
|
|
| 259 |
if kwargs.pop("refine-on-stop", False):
|
| 260 |
args.refine_on_stop = True
|
| 261 |
|
| 262 |
+
if args.split_text_by == "lines":
|
| 263 |
+
args.split_text_by = "\n"
|
| 264 |
+
elif args.split_text_by == "none":
|
| 265 |
+
args.split_text_by = None
|
| 266 |
+
|
| 267 |
tts = init_tts()
|
| 268 |
|
| 269 |
gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
|
| 270 |
|
| 271 |
sampling_kwargs = dict(
|
| 272 |
+
split_text_by=args.split_text_by,
|
| 273 |
+
context_history=args.context_history,
|
| 274 |
max_steps=args.max_steps,
|
| 275 |
max_levels=args.max_levels,
|
| 276 |
max_duration=args.max_duration,
|
|
|
|
| 446 |
layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
|
| 447 |
layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
|
| 448 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
|
| 449 |
+
with gr.Row():
|
| 450 |
+
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="Splits the text into pieces.", value="sentences")
|
| 451 |
+
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
| 452 |
with gr.Tab("Sampler Settings"):
|
| 453 |
with gr.Row():
|
| 454 |
layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
|
requirements.txt
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
torch
|
| 3 |
torchaudio
|
| 4 |
-
sageattention==1.0.6
|
| 5 |
|
| 6 |
-
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@
|
|
|
|
| 1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
torch
|
| 3 |
torchaudio
|
|
|
|
| 4 |
|
| 5 |
+
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@c66a53492c98222f2087de7af7e12da228d29534
|