Phillnet-2 / Audio /audio_cli.py
ayjays132's picture
Upload 478 files
101858b verified
from __future__ import annotations
import argparse
import json
from pathlib import Path
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Phill Swarm Audio CLI")
parser.add_argument("--root-dir", type=str, default=str(Path(__file__).resolve().parent))
subparsers = parser.add_subparsers(dest="command", required=True)
subparsers.add_parser("indicators", help="Show Audio runtime and terminal indicators")
synth = subparsers.add_parser("synthesize", help="Generate speech to a wav file")
synth.add_argument("--text", required=True)
synth.add_argument("--output", required=True)
synth.add_argument("--language", default=None)
synth.add_argument("--ref-audio", default=None)
synth.add_argument("--ref-text", default=None)
synth.add_argument("--instruct", default=None)
synth.add_argument("--duration", type=float, default=None)
synth.add_argument("--speed", type=float, default=None)
synth.add_argument("--num-step", type=int, default=8)
synth.add_argument("--guidance-scale", type=float, default=1.5)
synth.add_argument("--denoise", action="store_true")
speak = subparsers.add_parser("speak", help="Generate speech and try native OS playback")
speak.add_argument("--text", required=True)
speak.add_argument("--output", required=True)
speak.add_argument("--language", default=None)
speak.add_argument("--ref-audio", default=None)
speak.add_argument("--ref-text", default=None)
speak.add_argument("--instruct", default=None)
speak.add_argument("--duration", type=float, default=None)
speak.add_argument("--speed", type=float, default=None)
speak.add_argument("--num-step", type=int, default=8)
speak.add_argument("--guidance-scale", type=float, default=1.5)
speak.add_argument("--denoise", action="store_true")
speak.add_argument("--non-blocking", action="store_true")
transcribe = subparsers.add_parser("transcribe", help="Transcribe an audio file")
transcribe.add_argument("--input", required=True)
return parser
def main() -> None:
args = build_parser().parse_args()
if args.command == "synthesize":
from Audio.Pipeline import load_audio_pipeline
pipeline = load_audio_pipeline(args.root_dir)
output = pipeline.synthesize_to_file(
text=args.text,
output_path=args.output,
language=args.language,
ref_audio=args.ref_audio,
ref_text=args.ref_text,
instruct=args.instruct,
duration=args.duration,
speed=args.speed,
num_step=args.num_step,
guidance_scale=args.guidance_scale,
denoise=args.denoise,
)
print(output)
return
if args.command == "speak":
from Audio.Pipeline import load_audio_pipeline
pipeline = load_audio_pipeline(args.root_dir)
output, played = pipeline.speak(
text=args.text,
output_path=args.output,
language=args.language,
ref_audio=args.ref_audio,
ref_text=args.ref_text,
instruct=args.instruct,
duration=args.duration,
speed=args.speed,
num_step=args.num_step,
guidance_scale=args.guidance_scale,
denoise=args.denoise,
block=not args.non_blocking,
)
print({"output": str(output), "played": played})
return
if args.command == "transcribe":
from Audio.Pipeline import load_audio_pipeline
pipeline = load_audio_pipeline(args.root_dir)
print(pipeline.transcribe_file(args.input))
return
if args.command == "indicators":
from Audio.indicators import build_audio_indicator_payload
print(json.dumps(build_audio_indicator_payload(args.root_dir), indent=2))
if __name__ == "__main__":
main()