from __future__ import annotations import argparse import json from pathlib import Path def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Phill Swarm Audio CLI") parser.add_argument("--root-dir", type=str, default=str(Path(__file__).resolve().parent)) subparsers = parser.add_subparsers(dest="command", required=True) subparsers.add_parser("indicators", help="Show Audio runtime and terminal indicators") synth = subparsers.add_parser("synthesize", help="Generate speech to a wav file") synth.add_argument("--text", required=True) synth.add_argument("--output", required=True) synth.add_argument("--language", default=None) synth.add_argument("--ref-audio", default=None) synth.add_argument("--ref-text", default=None) synth.add_argument("--instruct", default=None) synth.add_argument("--duration", type=float, default=None) synth.add_argument("--speed", type=float, default=None) synth.add_argument("--num-step", type=int, default=8) synth.add_argument("--guidance-scale", type=float, default=1.5) synth.add_argument("--denoise", action="store_true") speak = subparsers.add_parser("speak", help="Generate speech and try native OS playback") speak.add_argument("--text", required=True) speak.add_argument("--output", required=True) speak.add_argument("--language", default=None) speak.add_argument("--ref-audio", default=None) speak.add_argument("--ref-text", default=None) speak.add_argument("--instruct", default=None) speak.add_argument("--duration", type=float, default=None) speak.add_argument("--speed", type=float, default=None) speak.add_argument("--num-step", type=int, default=8) speak.add_argument("--guidance-scale", type=float, default=1.5) speak.add_argument("--denoise", action="store_true") speak.add_argument("--non-blocking", action="store_true") transcribe = subparsers.add_parser("transcribe", help="Transcribe an audio file") transcribe.add_argument("--input", required=True) return parser def main() -> None: args = build_parser().parse_args() if args.command == "synthesize": from Audio.Pipeline import load_audio_pipeline pipeline = load_audio_pipeline(args.root_dir) output = pipeline.synthesize_to_file( text=args.text, output_path=args.output, language=args.language, ref_audio=args.ref_audio, ref_text=args.ref_text, instruct=args.instruct, duration=args.duration, speed=args.speed, num_step=args.num_step, guidance_scale=args.guidance_scale, denoise=args.denoise, ) print(output) return if args.command == "speak": from Audio.Pipeline import load_audio_pipeline pipeline = load_audio_pipeline(args.root_dir) output, played = pipeline.speak( text=args.text, output_path=args.output, language=args.language, ref_audio=args.ref_audio, ref_text=args.ref_text, instruct=args.instruct, duration=args.duration, speed=args.speed, num_step=args.num_step, guidance_scale=args.guidance_scale, denoise=args.denoise, block=not args.non_blocking, ) print({"output": str(output), "played": played}) return if args.command == "transcribe": from Audio.Pipeline import load_audio_pipeline pipeline = load_audio_pipeline(args.root_dir) print(pipeline.transcribe_file(args.input)) return if args.command == "indicators": from Audio.indicators import build_audio_indicator_payload print(json.dumps(build_audio_indicator_payload(args.root_dir), indent=2)) if __name__ == "__main__": main()