| """VOX ANI TTS — FastAPI + HTML UI |
| =================================== |
| - HTML UI served from static/index.html |
| - All voices & synthesis logic preserved |
| - REST endpoints for Vox Player app |
| """ |
|
|
| import os |
| import sys |
| import json |
| import time |
| import torch |
| import numpy as np |
| import soundfile as sf |
| import tempfile |
|
|
| from fastapi import FastAPI, Query, HTTPException, UploadFile, File as FastFile, BackgroundTasks |
| from fastapi.responses import FileResponse, HTMLResponse |
| from fastapi.staticfiles import StaticFiles |
|
|
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
|
|
| from config import CODEC_SAMPLE_RATE, CODEC_FRAME_RATE |
| from tokenizer import TTSTokenizer |
| from codec import CodecV6 |
| from model import load_for_inference |
| from inference import generate, _split_text |
| from audio_enhance import enhance_voice_for_cloning |
|
|
| |
| CHECKPOINT_PATH = "checkpoint_inference.pt" |
| VOICES_FILE = "voices.json" |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| STATIC_VOICES = { |
| "NOVA": [1.1905542612075806, 0.911335289478302, 0.017048384994268417, 0.6219748854637146, -3.8700151443481445, 0.5901893377304077, 0.2003730833530426, 0.07304413616657257, 0.3560754358768463, -4.402383327484131, 0.13412430882453918, 0.7333290576934814, 0.6954804062843323, 0.03965197131037712, 0.4772234857082367, -2.9969065189361572, 0.14260149002075195, 0.6045278906822205, 0.43753159046173096, 0.27066364884376526, 0.05965322256088257, -7.528304576873779, 0.061316393315792084, 0.37170031666755676, 0.0899418294429779, -3.191102981567383, -0.10583972930908203, -0.34356924891471863, 0.6052097678184509, 0.8864829540252686, -0.12419029325246811, 0.18624518811702728, 0.5465328693389893, 0.10085536539554596, 0.361403226852417, 0.28294241428375244, 0.11407288908958435, 0.4020424485206604, 0.318211168050766, 0.18416491150856018, 1.2316043376922607, 0.05566386878490448, -3.0626754760742188, 0.39995479583740234, 0.1184023767709732, 0.5414358973503113, 0.24752962589263916, 0.3401140570640564, 0.03436635807156563, 0.06832876801490784, 0.005995089188218117, 0.9363076686859131, 0.05009560286998749, 0.10749686509370804, -3.1572816371917725, 0.014406569302082062, 0.033463407307863235, 0.8389100432395935, 0.38054540753364563, 0.12472259253263474, -0.13591259717941284, 0.06685292720794678, 0.20993970334529877, 0.05220950022339821, 0.285030335187912, 0.23420803248882294, 0.001779097132384777, -2.928344249725342, 0.420032799243927, 0.5976344347000122, 1.2419675588607788, -0.013005070388317108, -2.794372797012329, 0.6308440566062927, 0.37192124128341675, 0.26056531071662903, 0.8862340450286865, -0.010409781709313393, 0.19720959663391113, -3.4644970893859863, 0.5564914345741272, 0.30465129017829895, -2.8717682361602783, 0.6245219111442566, 0.1030757948756218, 0.05254669114947319, 0.6154380440711975, 0.3203871548175812, 0.5704132318496704, -0.001082802191376686, 0.11111843585968018, -2.4022271633148193, 0.05973700061440468, 0.32718172669410706, 0.46028679609298706, 0.6836906671524048, 0.49810439348220825, 0.26284804940223694, 0.5748746991157532, 0.40610945224761963, 0.8076421618461609, 0.31935280561447144, 0.03156723827123642, 1.0723943710327148, 0.5207588076591492, 1.5836009979248047, 0.21744099259376526, 0.2677614390850067, 0.48335105180740356, 0.17183977365493774, -2.487086296081543, 0.22324232757091522, 0.1885831356048584, 0.4070374667644501, 0.006237425841391087, -3.7607340812683105, -0.1341061145067215, 0.3640291094779968, 0.3908931016921997, 0.4327312111854553, 0.3751571774482727, -0.14889493584632874, 0.4219122529029846, 0.5423245429992676, 0.18098433315753937, 0.041179634630680084, 0.09048353135585785, 0.1900213211774826], |
| "NOVA2": [1.1983299255371094, 0.7553510069847107, -0.11643315851688385, 0.6848059892654419, -3.4123072624206543, 0.3823966383934021, 0.020973416045308113, -0.041541289538145065, 0.1298651099205017, -4.320456504821777, 0.1328410804271698, 0.7798321843147278, 0.9192888140678406, -0.011441987007856369, 0.5021658539772034, -3.01277232170105, 0.15069840848445892, 0.5135632753372192, 0.5072751641273499, 0.10088983178138733, 0.07536688446998596, -7.504648208618164, 0.1982572376728058, 0.2028168886899948, 0.1208561509847641, -3.351240873336792, 0.10814803093671799, -0.2574847936630249, 0.5949290990829468, 0.8897058963775635, -0.011263539083302021, 0.023030906915664673, 0.5989617705345154, 0.25227615237236023, 0.3036550283432007, 0.097237728536129, 0.3288447856903076, 0.4038790166378021, 0.28024664521217346, 0.1414487659931183, 1.276529312133789, 0.09527754038572311, -3.2896828651428223, 0.4307906925678253, 0.1465688943862915, 0.6483601331710815, 0.45327043533325195, 0.535084068775177, 0.004426241852343082, -0.023835983127355576, -0.09964805841445923, 0.9329249858856201, 0.03744696453213692, 0.018313033506274223, -3.1105291843414307, 0.03548780828714371, 0.13072998821735382, 1.0241966247558594, 0.42775759100914, 0.2272561490535736, -0.18610148131847382, 0.10477077960968018, 0.1976785957813263, 0.016407163813710213, 0.31298208236694336, 0.4097185432910919, 0.07735035568475723, -3.1821649074554443, 0.2845577895641327, 0.39520949125289917, 1.1905566453933716, 0.19482173025608063, -2.7022228240966797, 0.7844187021255493, 0.3867405951023102, 0.22514104843139648, 1.0072884559631348, 0.10878886282444, 0.15838348865509033, -3.617748498916626, 0.26376873254776, 0.3570598363876343, -2.396841049194336, 0.6372708082199097, 0.01997438631951809, 0.07147836685180664, 0.46764785051345825, 0.2363276183605194, 0.5287986993789673, 0.16327831149101257, 0.11173143982887268, -2.901160478591919, -0.0006287320284172893, 0.21265800297260284, 0.4581712782382965, 0.5663840770721436, 0.46456241607666016, 0.3096385598182678, 0.5768164396286011, 0.5899262428283691, 0.9144637584686279, 0.1793370097875595, 0.09171684086322784, 0.9268653392791748, 0.6438857316970825, 1.475677728652954, 0.1277070939540863, 0.13146352767944336, 0.9435262680053711, 0.3426448702812195, -2.267172336578369, 0.06779059767723083, 0.162134051322937, 0.286209374666214, -0.05769478157162666, -3.8586134910583496, -0.05524313449859619, 0.34964698553085327, 0.39856162667274475, 0.4654121696949005, 0.3936040997505188, 0.027396317571401596, 0.39761143922805786, 0.4053165316581726, 0.08136938512325287, -0.011603720486164093, 0.027974925935268402, 0.17831583321094513], |
| "YANY": [0.7595553994178772, 0.7045170068740845, 0.14025861024856567, 0.5667456984519958, -3.617363452911377, 0.31423935294151306, 0.19483143091201782, -0.021618135273456573, 0.47987812757492065, -4.3643341064453125, 0.1844087541103363, 0.7400225400924683, 0.6076151728630066, 0.17821498215198517, 0.6499994993209839, -3.3450357913970947, 0.33548033237457275, 0.48264598846435547, 0.6536094546318054, 0.0376361720263958, 0.09048639237880707, -7.516693592071533, 0.08222998678684235, 0.2344668209552765, 0.11646643280982971, -3.2252886295318604, 0.11130928248167038, -0.14717638492584229, 0.3747222423553467, 0.7822909355163574, 0.019589057192206383, 0.24496370553970337, 1.0580699443817139, 0.5673164129257202, 0.24417510628700256, 0.29432353377342224, 0.18497471511363983, 0.5119978785514832, 0.4962784945964813, 0.204768568277359, 1.2384358644485474, -0.062021948397159576, -3.1774840354919434, 0.4962097108364105, -0.13075096905231476, 0.2981692850589752, 0.4086250364780426, 0.3752974569797516, 0.07090616226196289, 0.14261071383953094, -0.14197185635566711, 0.8166291117668152, -0.0609249472618103, 0.18801508843898773, -3.2127737998962402, 0.43553850054740906, -0.07682569324970245, 0.7805266976356506, 0.34974756836891174, 0.33446505665779114, -0.19968514144420624, 0.18937693536281586, 0.4269423186779022, -0.045752011239528656, -0.019833002239465714, 0.260649174451828, 0.006719403900206089, -3.4137356281280518, 0.47937801480293274, 0.6114392876625061, 1.1895595788955688, 0.29007431864738464, -2.403169870376587, 0.44408389925956726, 0.43230104446411133, 0.2233371138572693, 0.8427040576934814, 0.0887276902794838, 0.11937491595745087, -3.386258363723755, 0.6230071187019348, 0.2838999032974243, -3.1078875064849854, 0.2723325490951538, 0.20863571763038635, 0.09951550513505936, 0.5134825110435486, 0.026908542960882187, 0.5447674989700317, 0.18483781814575195, -0.028836730867624283, -2.662815570831299, 0.23732498288154602, 0.3241783678531647, 0.6850618124008179, 0.7286363840103149, 0.3241086006164551, 0.34012338519096375, 0.6306040287017822, 0.5372657179832458, 0.6698591709136963, 0.3421519100666046, 0.11022952944040298, 0.8070170283317566, 0.6347618699073792, 1.2677627801895142, 0.023278236389160156, 0.15844547748565674, 0.7308670282363892, 0.08875919133424759, -2.8425047397613525, 0.026972733438014984, 0.2932690978050232, 0.1280515342950821, 0.4489481449127197, -3.5902676582336426, -0.06417408585548401, 0.19549356400966644, 0.3790775239467621, 0.3419957160949707, 0.23203779757022858, 0.03513122349977493, 0.527247428894043, 0.5583801865577698, 0.22111022472381592, 0.09699676930904388, 0.17534780502319336, 0.1823458969593048], |
| "ANITA": [0.5489174276590347, 0.8563072681427002, 0.015058575198054314, 0.5856767892837524, -3.474443793296814, 0.5685910433530807, 0.05540411360561848, -0.166514509357512, 0.32931193709373474, -4.220456838607788, 0.17830145359039307, 0.7940778732299805, 0.41199035942554474, 0.07260656729340553, 0.7391091883182526, -2.992477297782898, 0.33138880133628845, 0.7154046595096588, 0.6319634020328522, 0.11274447292089462, 0.13320110738277435, -7.617172002792358, 0.24857618659734726, 0.26255226135253906, 0.08399171382188797, -2.8611263036727905, 0.13354498147964478, -0.002969544380903244, 0.3499854579567909, 0.5311120748519897, -0.025399386882781982, 0.2828158661723137, 0.5750554352998734, 0.4820759743452072, 0.4567323178052902, 0.4035782665014267, 0.3425174504518509, 0.306240051984787, 0.5308757424354553, 0.3264385610818863, 1.0148829519748688, -0.07871465012431145, -3.2808687686920166, 0.5336374640464783, -0.065285908523947, 0.08356216922402382, 0.36565399169921875, 0.3154626786708832, 0.156748715788126, 0.36649923026561737, -0.22774440050125122, 0.6688017547130585, -0.050320989452302456, 0.17112083733081818, -3.0628098249435425, 0.23470847308635712, 0.21637441217899323, 0.8258635103702545, 0.5496575832366943, 0.3798123002052307, -0.18623936921358109, 0.17447946220636368, 0.4036127179861069, 0.15702290832996368, 0.31793907284736633, 0.33534564077854156, -0.0962473526597023, -3.4386789798736572, 0.3713282197713852, 0.6002452671527863, 1.0634905099868774, 0.15481910854578018, -2.9156216382980347, 0.5021517276763916, 0.5440895110368729, 0.4653082937002182, 0.6940016746520996, 0.14119910448789597, 0.4195473939180374, -3.6648422479629517, 0.6860649287700653, 0.2642555832862854, -3.0756865739822388, 0.33001116663217545, 0.1546030193567276, 0.11629177257418633, 0.6103253066539764, 0.02144426666200161, 0.42899811267852783, -0.006054788827896118, 0.22657296806573868, -2.8145543336868286, 0.15966206416487694, 0.47316767275333405, 0.6700464189052582, 1.0120139420032501, 0.34442101418972015, 0.04423576220870018, 0.9130581915378571, 0.3285454958677292, 0.6877541542053223, 0.061741845682263374, 0.10550222545862198, 0.7509118616580963, 0.6574697494506836, 0.8685739040374756, 0.14616264775395393, 0.2814873680472374, 0.7580173015594482, 0.028720788657665253, -3.7125461101531982, 0.09411222487688065, 0.19545741379261017, 0.3242332637310028, 0.20917727798223495, -3.281902551651001, 0.07898347079753876, 0.3505653291940689, 0.5302634239196777, 0.24469570070505142, 0.3834524601697922, -0.12796197086572647, 0.4154924005270004, 0.43273375928401947, 0.35387393832206726, 0.15660029649734497, -0.021274873986840248, 0.23377800732851028] |
| } |
|
|
| def decode_key(encoded: str) -> str: |
| import base64 |
| try: |
| return base64.b64decode(encoded[::-1]).decode() |
| except Exception: |
| return "" |
|
|
|
|
| |
| ENCODED_API_KEY = "0IDMy81czV2YjF2XlRXY2lmcw9VauF2X49md" |
|
|
| if os.environ.get("VOX_API_KEY"): |
| API_KEY = os.environ.get("VOX_API_KEY") |
| elif ENCODED_API_KEY: |
| API_KEY = decode_key(ENCODED_API_KEY) |
| else: |
| API_KEY = None |
|
|
|
|
| |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") |
| SPACE_ID = os.environ.get("SPACE_ID", "") |
|
|
|
|
| def save_voices_to_repo(voices_data: dict): |
| with open(VOICES_FILE, "w", encoding="utf-8") as f: |
| json.dump(voices_data, f, ensure_ascii=False, indent=2) |
| if not HF_TOKEN or not SPACE_ID: |
| return |
| try: |
| from huggingface_hub import HfApi |
| api = HfApi(token=HF_TOKEN) |
| api.upload_file( |
| path_or_fileobj=VOICES_FILE, |
| path_in_repo=VOICES_FILE, |
| repo_id=SPACE_ID, |
| repo_type="space", |
| commit_message="Update voices.json", |
| ) |
| except Exception as e: |
| print(f"Warning: could not save to repo: {e}") |
|
|
|
|
| def load_voices() -> dict: |
| if HF_TOKEN and SPACE_ID: |
| try: |
| from huggingface_hub import hf_hub_download |
| hf_hub_download( |
| repo_id=SPACE_ID, |
| repo_type="space", |
| filename=VOICES_FILE, |
| local_dir=".", |
| token=HF_TOKEN, |
| ) |
| except Exception as e: |
| print(f"Could not pull {VOICES_FILE} from repo: {e}") |
| if os.path.exists(VOICES_FILE): |
| try: |
| with open(VOICES_FILE, "r", encoding="utf-8") as f: |
| data = json.load(f) |
| print(f" Loaded {len(data)} cloned voices from JSON") |
| return data |
| except Exception as e: |
| print(f" Error reading {VOICES_FILE}: {e}") |
| return {} |
|
|
|
|
| |
| MODEL = None |
| TOKENIZER = None |
| CODEC = None |
| DEFAULT_SPEAKER_EMB = None |
| VOICE_EMBEDDINGS = {} |
| CLONED_VOICES = {} |
|
|
| VOICE_WAV_MAP = { |
| "ani-bg-female": "sample_female_bg1.wav", |
| "ani-bg-male": "sample_male2_bg1.wav", |
| "ani-en-female": "sample_female_en1.wav", |
| "ani-en-male": "sample_male2_en1.wav", |
| } |
|
|
|
|
| def load_model(): |
| global MODEL, TOKENIZER, CODEC, DEFAULT_SPEAKER_EMB, VOICE_EMBEDDINGS, CLONED_VOICES |
| print(f"Loading model on {DEVICE}...") |
| MODEL = load_for_inference(CHECKPOINT_PATH, device=DEVICE) |
| TOKENIZER = TTSTokenizer() |
| CODEC = CodecV6(device=DEVICE) |
|
|
| |
| for voice_id, wav_file in VOICE_WAV_MAP.items(): |
| if os.path.exists(wav_file): |
| result = CODEC.encode(wav_file) |
| VOICE_EMBEDDINGS[voice_id] = result["global_embedding"].to(DEVICE) |
| print(f" Loaded WAV preset: {voice_id}") |
|
|
| |
| for v_name, emb_list in STATIC_VOICES.items(): |
| v_id = f"static-{v_name.lower()}" |
| VOICE_EMBEDDINGS[v_id] = torch.tensor(emb_list, dtype=torch.float32).to(DEVICE) |
| print(f" Loaded static preset: {v_id}") |
|
|
| |
| CLONED_VOICES = load_voices() |
|
|
| |
| DEFAULT_SPEAKER_EMB = VOICE_EMBEDDINGS.get("static-nova") or VOICE_EMBEDDINGS.get("ani-bg-female") |
| print("Model ready!") |
|
|
|
|
| def get_speaker_emb(voice_id: str): |
| if voice_id in VOICE_EMBEDDINGS: |
| return VOICE_EMBEDDINGS[voice_id] |
| if voice_id in CLONED_VOICES: |
| return torch.tensor( |
| CLONED_VOICES[voice_id]["embedding"], dtype=torch.float32 |
| ).to(DEVICE) |
| return DEFAULT_SPEAKER_EMB |
|
|
|
|
| _SILENCE_FRAMES = int(CODEC_FRAME_RATE * 0.15) |
|
|
|
|
| def synthesize_text(text: str, speaker_emb=None) -> np.ndarray: |
| if speaker_emb is None: |
| speaker_emb = DEFAULT_SPEAKER_EMB |
| chunks = _split_text(text, TOKENIZER, max_len=250) |
| all_audio = [] |
| for chunk in chunks: |
| codes = generate( |
| MODEL, TOKENIZER, chunk, speaker_emb, |
| max_new_tokens=512, temperature=0.3, |
| top_k=250, top_p=0.95, rep_penalty=1.3, device=DEVICE, |
| ) |
| if codes is not None and len(codes) > 0: |
| audio = CODEC.decode(codes, speaker_emb).cpu().numpy() |
| all_audio.append(audio) |
| if len(chunks) > 1: |
| silence = np.zeros(int(CODEC_SAMPLE_RATE * 0.15), dtype=np.float32) |
| all_audio.append(silence) |
| if not all_audio: |
| return np.zeros(1000, dtype=np.float32) |
| if len(chunks) > 1 and len(all_audio) > 1: |
| all_audio = all_audio[:-1] |
| return np.concatenate(all_audio) |
|
|
|
|
| |
| def require_key(api_key: str): |
| if API_KEY is not None and api_key != API_KEY: |
| raise HTTPException(status_code=403, detail="Invalid API key") |
|
|
|
|
| |
| app = FastAPI(title="VOX ANI TTS") |
|
|
| app.mount("/static", StaticFiles(directory="static"), name="static") |
|
|
|
|
| @app.on_event("startup") |
| def startup(): |
| try: |
| load_model() |
| except Exception as e: |
| print(f"⚠️ Model not loaded: {e}") |
|
|
|
|
| def remove_file(path: str): |
| if os.path.exists(path): |
| os.remove(path) |
|
|
|
|
| @app.get("/", response_class=HTMLResponse) |
| def serve_ui(): |
| with open("static/index.html", encoding="utf-8") as f: |
| return f.read() |
|
|
|
|
| @app.get("/voices") |
| def api_get_voices(api_key: str = Query(default="")): |
| require_key(api_key) |
| preset = [{"id": k, "name": k, "type": "preset"} |
| for k in VOICE_EMBEDDINGS] |
| cloned = [{"id": k, "name": v["name"], "type": "cloned", "embedding": v["embedding"]} |
| for k, v in CLONED_VOICES.items()] |
| return {"voices": preset + cloned} |
|
|
|
|
| @app.get("/synthesize") |
| def api_synthesize( |
| text: str = Query(...), |
| api_key: str = Query(default=""), |
| voice: str = Query(default="static-nova"), |
| background_tasks: BackgroundTasks = BackgroundTasks(), |
| ): |
| require_key(api_key) |
| speaker_emb = get_speaker_emb(voice) |
| wav = synthesize_text(text, speaker_emb) |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
| sf.write(tmp.name, wav, CODEC_SAMPLE_RATE) |
| background_tasks.add_task(remove_file, tmp.name) |
| return FileResponse(tmp.name, media_type="audio/wav") |
|
|
|
|
| @app.get("/synthesize_with_embedding") |
| def api_synthesize_with_embedding( |
| text: str = Query(...), |
| api_key: str = Query(default=""), |
| embedding: str = Query(...), |
| background_tasks: BackgroundTasks = BackgroundTasks(), |
| ): |
| require_key(api_key) |
| emb_list = json.loads(embedding) |
| speaker_emb = torch.tensor(emb_list, dtype=torch.float32).to(DEVICE) |
| wav = synthesize_text(text, speaker_emb) |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
| sf.write(tmp.name, wav, CODEC_SAMPLE_RATE) |
| background_tasks.add_task(remove_file, tmp.name) |
| return FileResponse(tmp.name, media_type="audio/wav") |
|
|
|
|
| @app.post("/encode_voice") |
| async def api_encode_voice( |
| api_key: str = Query(default=""), |
| file: UploadFile = FastFile(...), |
| enhance: bool = Query(default=True), |
| denoise_strength: float = Query(default=0.75), |
| deess_db: float = Query(default=6.0), |
| warm_db: float = Query(default=2.5), |
| ): |
| require_key(api_key) |
| audio_bytes = await file.read() |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
| tmp.write(audio_bytes) |
| tmp_path = tmp.name |
| try: |
| audio, sr = sf.read(tmp_path) |
| audio = audio.astype(np.float32) |
| if audio.ndim > 1: |
| audio = audio.mean(axis=1) |
| if enhance: |
| audio = enhance_voice_for_cloning( |
| audio, sr, |
| denoise_strength=denoise_strength, |
| deess_reduction_db=deess_db, |
| warm_boost_db=warm_db, |
| ) |
| sf.write(tmp_path, audio, sr) |
| result = CODEC.encode(tmp_path) |
| finally: |
| if os.path.exists(tmp_path): |
| os.remove(tmp_path) |
| return {"embedding": result["global_embedding"].squeeze().cpu().tolist()} |
|
|
|
|
| @app.post("/clone_voice") |
| async def api_clone_voice( |
| api_key: str = Query(default=""), |
| name: str = Query(default=""), |
| file: UploadFile = FastFile(...), |
| enhance: bool = Query(default=True), |
| denoise_strength: float = Query(default=0.75), |
| deess_db: float = Query(default=6.0), |
| warm_db: float = Query(default=2.5), |
| ): |
| require_key(api_key) |
| if len(CLONED_VOICES) >= 100: |
| raise HTTPException(status_code=400, detail="Max 100 cloned voices") |
|
|
| audio_bytes = await file.read() |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
| tmp.write(audio_bytes) |
| tmp_path = tmp.name |
| try: |
| audio, sr = sf.read(tmp_path) |
| audio = audio.astype(np.float32) |
| if audio.ndim > 1: |
| audio = audio.mean(axis=1) |
| if enhance: |
| audio = enhance_voice_for_cloning( |
| audio, sr, |
| denoise_strength=denoise_strength, |
| deess_reduction_db=deess_db, |
| warm_boost_db=warm_db, |
| ) |
| sf.write(tmp_path, audio, sr) |
| result = CODEC.encode(tmp_path) |
| embedding = result["global_embedding"].squeeze().cpu().tolist() |
| finally: |
| if os.path.exists(tmp_path): |
| os.remove(tmp_path) |
|
|
| voice_name = name.strip() if name.strip() else f"Cloned_{int(time.time())}" |
| voice_id = f"clone_{int(time.time())}" |
| CLONED_VOICES[voice_id] = {"name": voice_name, "embedding": embedding} |
| save_voices_to_repo(CLONED_VOICES) |
| return {"id": voice_id, "name": voice_name} |
|
|
|
|
| @app.delete("/voices/{voice_id}") |
| def api_delete_voice(voice_id: str, api_key: str = Query(default="")): |
| require_key(api_key) |
| if voice_id not in CLONED_VOICES: |
| raise HTTPException(status_code=404, detail="Voice not found") |
| name = CLONED_VOICES.pop(voice_id)["name"] |
| save_voices_to_repo(CLONED_VOICES) |
| return {"deleted": voice_id, "name": name} |
|
|
|
|
| @app.get("/voices/{voice_id}/download") |
| def api_download_voice(voice_id: str, api_key: str = Query(default="")): |
| require_key(api_key) |
| if voice_id in CLONED_VOICES: |
| v = CLONED_VOICES[voice_id] |
| data = {voice_id: {"name": v["name"], "embedding": v["embedding"]}} |
| elif voice_id in VOICE_EMBEDDINGS: |
| data = {voice_id: {"name": voice_id, "embedding": VOICE_EMBEDDINGS[voice_id].cpu().tolist()}} |
| else: |
| raise HTTPException(status_code=404, detail="Voice not found") |
|
|
| safe = (CLONED_VOICES[voice_id]["name"] if voice_id in CLONED_VOICES else voice_id).replace(" ", "_") |
| tmp = tempfile.NamedTemporaryFile( |
| suffix=".json", prefix=f"voice_{safe}_", |
| delete=False, mode="w", encoding="utf-8", |
| ) |
| json.dump(data, tmp, ensure_ascii=False, indent=2) |
| tmp.close() |
| return FileResponse( |
| tmp.name, |
| media_type="application/json", |
| filename=f"voice_{safe}.json", |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |