aryo100 commited on
Commit
22909b3
·
1 Parent(s): 33a7f5c

update docker & add api

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. api.py +167 -0
Dockerfile CHANGED
@@ -11,4 +11,4 @@ EXPOSE 7860
11
  ENV NUMBA_CACHE_DIR=/tmp/numba_cache
12
  ENV NUMBA_DISABLE_CACHING=1
13
 
14
- CMD ["uvicorn", "webui:app", "--host", "0.0.0.0", "--port", "7860"]
 
11
  ENV NUMBA_CACHE_DIR=/tmp/numba_cache
12
  ENV NUMBA_DISABLE_CACHING=1
13
 
14
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
api.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import edge_tts
4
+ import soundfile as sf
5
+ import torch
6
+ import fairseq
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.responses import FileResponse
9
+ from pydantic import BaseModel
10
+ # from modules import models
11
+ from uuid import uuid4
12
+ import requests
13
+ from modules.core import preload
14
+ from modules.models import load_model
15
+
16
+ app = FastAPI()
17
+
18
+ preload()
19
+
20
+ path_models = [
21
+ {
22
+ "name": "zeta",
23
+ "label": "Zeta",
24
+ "ckpt_path": "weights/zet_test1.pth",
25
+ "index_path": "weights/zet_test1.0.index"
26
+ },
27
+ ]
28
+
29
+ # List model edge_tts (voice) dengan label, name, gender
30
+ edge_tts_voices = [
31
+ {"name": "id-ID-GadisNeural", "label": "Indonesian Female (Gadis)", "gender": "Female", "language": "Indonesian"},
32
+ {"name": "id-ID-ArdiNeural", "label": "Indonesian Male (Ardi)", "gender": "Male", "language": "Indonesian"},
33
+ {"name": "en-US-JennyNeural", "label": "English US Female (Jenny)", "gender": "Female", "language": "English"},
34
+ {"name": "en-US-GuyNeural", "label": "English US Male (Guy)", "gender": "Male", "language": "English"},
35
+ {"name": "ja-JP-NanamiNeural", "label": "Japanese Female (Nanami)", "gender": "Female", "language": "Japanese"},
36
+ {"name": "ja-JP-KeitaNeural", "label": "Japanese Male (Keita)", "gender": "Male", "language": "Japanese"},
37
+ ]
38
+
39
+ BACK4APP_TTS_URL = os.getenv("BACK4APP_TTS_URL")
40
+
41
+ async def generate_tts_with_back4app(text: str, voice: str, tts_wav: str):
42
+ try:
43
+ response = requests.post(
44
+ f"{BACK4APP_TTS_URL}/tts",
45
+ json={"text": text, "voice": voice},
46
+ timeout=60
47
+ )
48
+ if response.status_code != 200:
49
+ raise HTTPException(status_code=500, detail=f"Back4App TTS failed: {response.text}")
50
+ response.raise_for_status()
51
+ data = response.json()
52
+
53
+ # 2. Ambil file URL dari response
54
+ tts_url = data["file"]
55
+ r = requests.get(f"{BACK4APP_TTS_URL}{tts_url}", stream=True)
56
+ r.raise_for_status()
57
+ with open(tts_wav, "wb") as f:
58
+ for chunk in r.iter_content(8192):
59
+ f.write(chunk)
60
+
61
+ except Exception as e:
62
+ raise HTTPException(status_code=500, detail=f"TTS error via Back4App: {e}")
63
+
64
+ class TTSRequest(BaseModel):
65
+ text: str
66
+ name: str # nama model yang sesuai dengan daftar di 'models'
67
+ tts_voice: str = "id-ID-GadisNeural"
68
+ f0_up_key: int = 0
69
+
70
+ def limit_tts_files(output_dir, max_files=10):
71
+ files = sorted(
72
+ [os.path.join(output_dir, f) for f in os.listdir(output_dir)],
73
+ key=os.path.getmtime
74
+ )
75
+ while len(files) > max_files:
76
+ os.remove(files[0])
77
+ files.pop(0)
78
+
79
+ @app.post("/tts")
80
+ async def tts_api(req: TTSRequest):
81
+ # Cari model berdasarkan name
82
+ model = next((m for m in path_models if m["name"] == req.name), None)
83
+ if not model:
84
+ raise HTTPException(status_code=404, detail=f"Model '{req.name}' not found.")
85
+
86
+ ckpt_path = model["ckpt_path"]
87
+ index_path = model["index_path"]
88
+
89
+ # Cek file model dan index
90
+ if not os.path.isfile(ckpt_path):
91
+ raise HTTPException(status_code=404, detail=f"Model file not found: {ckpt_path}")
92
+ if not os.path.isfile(index_path):
93
+ raise HTTPException(status_code=404, detail=f"Index file not found: {index_path}")
94
+
95
+ # Path output
96
+ output_dir = "/tmp/tts"
97
+ os.makedirs(output_dir, exist_ok=True)
98
+ limit_tts_files(output_dir, max_files=10)
99
+ tts_wav = f"{output_dir}/{uuid4().hex}_tts.wav"
100
+ output_wav = f"{output_dir}/{uuid4().hex}_rvc.wav"
101
+ index_rate = 0.75
102
+
103
+ # 1. Generate TTS
104
+ try:
105
+ # Ganti pakai Back4App TTS
106
+ communicate = edge_tts.Communicate(req.text, req.tts_voice)
107
+ with open(tts_wav, "wb") as f:
108
+ async for chunk in communicate.stream():
109
+ if chunk["type"] == "audio":
110
+ f.write(chunk["data"])
111
+ # await generate_tts_with_back4app(req.text, req.tts_voice, tts_wav)
112
+ except Exception as e:
113
+ raise HTTPException(status_code=500, detail=f"TTS error: {e}")
114
+
115
+ # 2. Voice Conversion
116
+ try:
117
+ # models.load_model(ckpt_path)
118
+ # vc = models.vc_model
119
+ vc = load_model(ckpt_path, config_json="configs/48k-768.json")
120
+
121
+ if vc is None:
122
+ raise Exception("Failed to load model")
123
+
124
+ # Run conversion menggunakan method single() yang benar
125
+ result = vc.single(
126
+ sid=0, # speaker id
127
+ input_audio=tts_wav, # path audio input
128
+ embedder_model_name="auto", # auto detect embedder
129
+ embedding_output_layer="auto", # auto detect layer
130
+ f0_up_key=req.f0_up_key, # pitch shift
131
+ f0_file="", # f0 curve file (kosong)
132
+ f0_method="harvest", # f0 method
133
+ auto_load_index=True, # auto load index
134
+ faiss_index_file=index_path, # index file path
135
+ index_rate=index_rate, # index rate
136
+ output_dir=output_dir # output directory
137
+ )
138
+
139
+ # Cek apakah result tuple atau string error
140
+ if not (isinstance(result, tuple) and isinstance(result[1], tuple)):
141
+ raise HTTPException(status_code=500, detail=f"RVC error: {result}")
142
+ info, (tgt_sr, audio_opt) = result
143
+ sf.write(output_wav, audio_opt, tgt_sr)
144
+ except Exception as e:
145
+ raise HTTPException(status_code=500, detail=f"RVC error: {e}")
146
+
147
+ # Ambil domain dari environment Hugging Face
148
+ space_id = os.environ.get("SPACE_ID")
149
+ if space_id:
150
+ username, space_name = space_id.split("/")
151
+ space_url = f"https://{username}-rvc-tts.hf.space"
152
+ public_url = f"{space_url}/file-tmp?path={output_wav}"
153
+ else:
154
+ public_url = f"/file-tmp?path={output_wav}"
155
+
156
+ return {"result": public_url}
157
+
158
+ @app.get("/file-tmp")
159
+ def get_tmp_file(path: str):
160
+ # Security: hanya izinkan akses file di /tmp/tts
161
+ if not path.startswith("/tmp/tts/"):
162
+ raise HTTPException(status_code=403, detail="Forbidden")
163
+ if not os.path.isfile(path):
164
+ raise HTTPException(status_code=404, detail="File not found")
165
+ return FileResponse(path)
166
+
167
+ # Jalankan dengan: uvicorn api_tts:app --reload