| import os
|
| from argparse import ArgumentParser
|
| from pathlib import Path
|
|
|
| import httpx
|
| import ormsgpack
|
|
|
| from tools.schema import ServeReferenceAudio, ServeTTSRequest
|
|
|
| api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
|
|
|
|
|
| def audio_request():
|
|
|
| request = ServeTTSRequest(
|
| text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
|
|
|
| references=[
|
| ServeReferenceAudio(
|
| audio=open("lengyue.wav", "rb").read(),
|
| text=open("lengyue.lab", "r", encoding="utf-8").read(),
|
| )
|
| ],
|
| streaming=True,
|
| )
|
|
|
| api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
|
|
|
| with (
|
| httpx.Client() as client,
|
| open("hello.wav", "wb") as f,
|
| ):
|
| with client.stream(
|
| "POST",
|
| "http://127.0.0.1:8080/v1/tts",
|
| content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
|
| headers={
|
| "authorization": f"Bearer {api_key}",
|
| "content-type": "application/msgpack",
|
| },
|
| timeout=None,
|
| ) as response:
|
| for chunk in response.iter_bytes():
|
| f.write(chunk)
|
|
|
|
|
| def asr_request(audio_path: Path):
|
|
|
|
|
| with open(
|
| str(audio_path),
|
| "rb",
|
| ) as audio_file:
|
| audio_data = audio_file.read()
|
|
|
|
|
| request_data = {
|
| "audio": audio_data,
|
| "language": "en",
|
| "ignore_timestamps": False,
|
| }
|
|
|
|
|
| with httpx.Client() as client:
|
| response = client.post(
|
| "https://api.fish.audio/v1/asr",
|
| headers={
|
| "Authorization": f"Bearer {api_key}",
|
| "Content-Type": "application/msgpack",
|
| },
|
| content=ormsgpack.packb(request_data),
|
| )
|
|
|
|
|
| result = response.json()
|
|
|
| print(f"Transcribed text: {result['text']}")
|
| print(f"Audio duration: {result['duration']} seconds")
|
|
|
| for segment in result["segments"]:
|
| print(f"Segment: {segment['text']}")
|
| print(f"Start time: {segment['start']}, End time: {segment['end']}")
|
|
|
|
|
| def parse_args():
|
| parser = ArgumentParser()
|
| parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
|
|
|
| return parser.parse_args()
|
|
|
|
|
| if __name__ == "__main__":
|
| args = parse_args()
|
|
|
| asr_request(args.audio_path)
|
|
|