vall-e-44khz

Sleeping

App Files Files Community

mrq commited on Dec 5, 2024

Commit

181ff0a

1 Parent(s): a5c1bd1

m

Browse files

Files changed (2) hide show

app.py +13 -2
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -99,8 +99,7 @@ def get_model_paths( paths=[Path("./training/"), Path("./models/"), Path("./data
 				continue
 			configs.append( sft )
-	if is_windows:
-		configs = [ str(p) for p in configs ]
 	return configs
@@ -205,6 +204,8 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
 	parser.add_argument("--modality", type=str, default=kwargs["modality"])
 	parser.add_argument("--references", type=str, default=kwargs["reference"])
 	parser.add_argument("--language", type=str, default=kwargs["language"])
 	parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
 	parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
 	parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
@@ -258,11 +259,18 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
 	if kwargs.pop("refine-on-stop", False):
 		args.refine_on_stop = True
 	tts = init_tts()
 	gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
 	sampling_kwargs = dict(
 		max_steps=args.max_steps,
 		max_levels=args.max_levels,
 		max_duration=args.max_duration,
@@ -438,6 +446,9 @@ with ui:
 							layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
 							layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
 							layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
 					with gr.Tab("Sampler Settings"):
 						with gr.Row():
 							layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")

 				continue
 			configs.append( sft )
+	configs = [ str(p) for p in configs ]
 	return configs
 	parser.add_argument("--modality", type=str, default=kwargs["modality"])
 	parser.add_argument("--references", type=str, default=kwargs["reference"])
 	parser.add_argument("--language", type=str, default=kwargs["language"])
+	parser.add_argument("--split-text-by", type=str, default=kwargs["split-text-by"])
+	parser.add_argument("--context-history", type=int, default=kwargs["context-history"])
 	parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
 	parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
 	parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
 	if kwargs.pop("refine-on-stop", False):
 		args.refine_on_stop = True
+	if args.split_text_by == "lines":
+		args.split_text_by = "\n"
+	elif args.split_text_by == "none":
+		args.split_text_by = None
 	tts = init_tts()
 	gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
 	sampling_kwargs = dict(
+		split_text_by=args.split_text_by,
+		context_history=args.context_history,
 		max_steps=args.max_steps,
 		max_levels=args.max_levels,
 		max_duration=args.max_duration,
 							layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
 							layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
 							layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
+						with gr.Row():
+							layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="Splits the text into pieces.", value="sentences")
+							layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
 					with gr.Tab("Sampler Settings"):
 						with gr.Row():
 							layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
 torch
 torchaudio
-sageattention==1.0.6
-vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@9dff68c0c57bb46da1847313b0ea23d44bd3050c

 --extra-index-url https://download.pytorch.org/whl/cu121
 torch
 torchaudio
+vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@c66a53492c98222f2087de7af7e12da228d29534