Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
tiny_tts/__init__.py
CHANGED
|
@@ -38,7 +38,7 @@ class TinyTTS:
|
|
| 38 |
|
| 39 |
self.model = load_engine(checkpoint_path, self.device)
|
| 40 |
|
| 41 |
-
def speak(self, text, output_path="output.wav", speaker="
|
| 42 |
"""Synthesize text to speech and save to output_path."""
|
| 43 |
print(f"Synthesizing: {text}")
|
| 44 |
|
|
|
|
| 38 |
|
| 39 |
self.model = load_engine(checkpoint_path, self.device)
|
| 40 |
|
| 41 |
+
def speak(self, text, output_path="output.wav", speaker="female"):
|
| 42 |
"""Synthesize text to speech and save to output_path."""
|
| 43 |
print(f"Synthesizing: {text}")
|
| 44 |
|
tiny_tts/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (3.03 kB). View file
|
|
|
tiny_tts/__pycache__/infer.cpython-310.pyc
ADDED
|
Binary file (5.61 kB). View file
|
|
|
tiny_tts/infer.py
CHANGED
|
@@ -123,13 +123,25 @@ def get_latest_checkpoint(checkpoint_dir):
|
|
| 123 |
def main():
|
| 124 |
parser = argparse.ArgumentParser(description="TinyTTS — English Text-to-Speech Inference")
|
| 125 |
parser.add_argument("--text", "-t", type=str, default="The weather is nice today, and I feel very relaxed.", help="Text to synthesize")
|
| 126 |
-
parser.add_argument("--checkpoint", "-c", type=str,
|
| 127 |
parser.add_argument("--output", "-o", type=str, default="english_test.wav", help="Output audio file path")
|
| 128 |
parser.add_argument("--speaker", "-s", type=str, default="female", help="Speaker ID")
|
| 129 |
parser.add_argument("--device", type=str, default="cuda", help="Device to use (cuda or cpu)")
|
| 130 |
|
| 131 |
args = parser.parse_args()
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if not os.path.exists(args.checkpoint):
|
| 134 |
print(f"Error: Checkpoint or directory not found at {args.checkpoint}")
|
| 135 |
sys.exit(1)
|
|
@@ -167,6 +179,5 @@ def main():
|
|
| 167 |
final_output = os.path.join(out_dir, f"{name}_step{step_str}_spk{args.speaker}{ext}")
|
| 168 |
synthesize(args.text, final_output, model, speaker=args.speaker, device=args.device)
|
| 169 |
|
| 170 |
-
|
| 171 |
if __name__ == "__main__":
|
| 172 |
main()
|
|
|
|
| 123 |
def main():
|
| 124 |
parser = argparse.ArgumentParser(description="TinyTTS — English Text-to-Speech Inference")
|
| 125 |
parser.add_argument("--text", "-t", type=str, default="The weather is nice today, and I feel very relaxed.", help="Text to synthesize")
|
| 126 |
+
parser.add_argument("--checkpoint", "-c", type=str, default=None, help="Path to checkpoint. Auto-downloads if not provided.")
|
| 127 |
parser.add_argument("--output", "-o", type=str, default="english_test.wav", help="Output audio file path")
|
| 128 |
parser.add_argument("--speaker", "-s", type=str, default="female", help="Speaker ID")
|
| 129 |
parser.add_argument("--device", type=str, default="cuda", help="Device to use (cuda or cpu)")
|
| 130 |
|
| 131 |
args = parser.parse_args()
|
| 132 |
|
| 133 |
+
if args.checkpoint is None:
|
| 134 |
+
try:
|
| 135 |
+
from huggingface_hub import hf_hub_download
|
| 136 |
+
print("Downloading/Loading checkpoint from Hugging Face Hub (backtracking/tiny-tts)...")
|
| 137 |
+
args.checkpoint = hf_hub_download(repo_id="backtracking/tiny-tts", filename="G.pth")
|
| 138 |
+
except ImportError:
|
| 139 |
+
print("Error: huggingface_hub is required for auto-download. Run: pip install huggingface_hub")
|
| 140 |
+
sys.exit(1)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
print(f"Error downloading checkpoint: {e}")
|
| 143 |
+
sys.exit(1)
|
| 144 |
+
|
| 145 |
if not os.path.exists(args.checkpoint):
|
| 146 |
print(f"Error: Checkpoint or directory not found at {args.checkpoint}")
|
| 147 |
sys.exit(1)
|
|
|
|
| 179 |
final_output = os.path.join(out_dir, f"{name}_step{step_str}_spk{args.speaker}{ext}")
|
| 180 |
synthesize(args.text, final_output, model, speaker=args.speaker, device=args.device)
|
| 181 |
|
|
|
|
| 182 |
if __name__ == "__main__":
|
| 183 |
main()
|
tiny_tts/utils/config.py
CHANGED
|
@@ -8,7 +8,7 @@ SPEC_CHANNELS = FILTER_LENGTH // 2 + 1 # 1025
|
|
| 8 |
|
| 9 |
# Speakers
|
| 10 |
N_SPEAKERS = 1
|
| 11 |
-
SPK2ID = {"
|
| 12 |
|
| 13 |
# Model
|
| 14 |
MODEL_PARAMS = dict(
|
|
|
|
| 8 |
|
| 9 |
# Speakers
|
| 10 |
N_SPEAKERS = 1
|
| 11 |
+
SPK2ID = {"female": 0}
|
| 12 |
|
| 13 |
# Model
|
| 14 |
MODEL_PARAMS = dict(
|