Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -61,8 +61,8 @@ if not hasattr(torchaudio, "info"):
|
|
| 61 |
|
| 62 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 63 |
|
| 64 |
-
# FORCE BUILD TRIGGER:
|
| 65 |
-
#
|
| 66 |
|
| 67 |
# π οΈ Monkeypatch torchaudio.load
|
| 68 |
try:
|
|
@@ -93,7 +93,7 @@ MODELS = {"stt": None, "translate": None, "tts": None, "tokenizer": None, "denoi
|
|
| 93 |
def load_models():
|
| 94 |
global MODELS
|
| 95 |
if MODELS["stt"] is None:
|
| 96 |
-
print("ποΈ Loading Faster-Whisper large-v3...")
|
| 97 |
from faster_whisper import WhisperModel
|
| 98 |
if torch.cuda.is_available():
|
| 99 |
print(f"π GPU Detected: {torch.cuda.get_device_name(0)}")
|
|
@@ -108,7 +108,6 @@ def load_models():
|
|
| 108 |
torch.cuda.empty_cache()
|
| 109 |
|
| 110 |
# Initialize Chatterbox ONNX (High-Speed Fallback)
|
| 111 |
-
# This will load the model if not already loaded internally by chatterbox_utils
|
| 112 |
chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
|
| 113 |
|
| 114 |
if MODELS["translate"] is None:
|
|
@@ -123,7 +122,7 @@ def load_models():
|
|
| 123 |
except: pass
|
| 124 |
|
| 125 |
if MODELS["tts"] is None:
|
| 126 |
-
print("π Loading XTTS-v2...")
|
| 127 |
from TTS.api import TTS
|
| 128 |
try:
|
| 129 |
MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
|
|
@@ -132,6 +131,35 @@ def load_models():
|
|
| 132 |
print(f"β Failed to load XTTS: {e}")
|
| 133 |
raise e
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
def _stt_logic(request_dict):
|
| 136 |
"""STT Logic (Runs on GPU when called via core_process)"""
|
| 137 |
audio_bytes = base64.b64decode(request_dict.get("file"))
|
|
@@ -158,8 +186,6 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 158 |
if not text or not text.strip():
|
| 159 |
return {"error": "TTS Error: Input text is empty"}
|
| 160 |
|
| 161 |
-
# π XTTS-v2 COMPLETE 16-LANGUAGE MAPPING (v79)
|
| 162 |
-
# This dictionary ensures every officially supported XTTS language code is correctly matched.
|
| 163 |
XTTS_MAP = {
|
| 164 |
"en": "en", "en-us": "en", "en-gb": "en",
|
| 165 |
"de": "de", "de-de": "de",
|
|
@@ -180,18 +206,15 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 180 |
}
|
| 181 |
|
| 182 |
XTTS_LANG_CODES = set(XTTS_MAP.values())
|
| 183 |
-
|
| 184 |
mapped_lang = None
|
| 185 |
if lang:
|
| 186 |
lang_key = lang.strip().lower()
|
| 187 |
mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
|
| 188 |
|
| 189 |
-
print(f"[
|
| 190 |
|
| 191 |
-
# π£οΈ INTELLIGENT ROUTING
|
| 192 |
-
# Case A: XTTS Support (Voice Cloning)
|
| 193 |
if mapped_lang and mapped_lang in XTTS_LANG_CODES:
|
| 194 |
-
print(f"[
|
| 195 |
speaker_wav_path = None
|
| 196 |
if speaker_wav_b64:
|
| 197 |
sb = base64.b64decode(speaker_wav_b64)
|
|
@@ -205,7 +228,6 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 205 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 206 |
output_path = output_file.name
|
| 207 |
|
| 208 |
-
# ποΈ XTTS Inference
|
| 209 |
MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=output_path, speaker_wav=speaker_wav_path)
|
| 210 |
|
| 211 |
with open(output_path, "rb") as f:
|
|
@@ -216,22 +238,17 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 216 |
if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
|
| 217 |
if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
|
| 218 |
|
| 219 |
-
|
| 220 |
-
print(f"[v84] Using Chatterbox ONNX Fallback for '{lang}'")
|
| 221 |
try:
|
| 222 |
-
# Use local file if available for cloning in Chatterbox too
|
| 223 |
temp_ref = None
|
| 224 |
if speaker_wav_b64:
|
| 225 |
sb = base64.b64decode(speaker_wav_b64)
|
| 226 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 227 |
f.write(sb); temp_ref = f.name
|
| 228 |
|
| 229 |
-
# Chatterbox supports codes like 'fi', 'el', 'da', etc.
|
| 230 |
chatter_lang = lang.strip().lower().split('-')[0]
|
| 231 |
audio_bytes = chatterbox_utils.run_chatterbox_inference(text, chatter_lang, speaker_wav_path=temp_ref)
|
| 232 |
-
|
| 233 |
if temp_ref and os.path.exists(temp_ref): os.unlink(temp_ref)
|
| 234 |
-
|
| 235 |
audio_b64 = base64.b64encode(audio_bytes).decode()
|
| 236 |
return {"audio": audio_b64}
|
| 237 |
except Exception as e:
|
|
@@ -240,14 +257,10 @@ def _tts_logic(text, lang, speaker_wav_b64):
|
|
| 240 |
|
| 241 |
@spaces.GPU
|
| 242 |
def core_process(request_dict):
|
| 243 |
-
"""
|
| 244 |
-
Unified GPU Entry Point (v84).
|
| 245 |
-
This function handles all high-speed tasks inside a single GPU allocation.
|
| 246 |
-
The container stays resident on CPU but triggers GPU on demand.
|
| 247 |
-
"""
|
| 248 |
action = request_dict.get("action")
|
| 249 |
t0 = time.time()
|
| 250 |
-
print(f"--- [
|
| 251 |
load_models()
|
| 252 |
|
| 253 |
try:
|
|
@@ -258,26 +271,21 @@ def core_process(request_dict):
|
|
| 258 |
elif action == "tts":
|
| 259 |
res = _tts_logic(request_dict.get("text"), request_dict.get("lang"), request_dict.get("speaker_wav"))
|
| 260 |
elif action == "s2st":
|
| 261 |
-
# π FULL PIPELINE (Single GPU Call)
|
| 262 |
stt_res = _stt_logic({"file": request_dict.get("file"), "lang": request_dict.get("source_lang")})
|
| 263 |
text = stt_res.get("text", "")
|
| 264 |
if not text: return {"error": "No speech detected"}
|
| 265 |
-
|
| 266 |
translated = _translate_logic(text, request_dict.get("target_lang"))
|
| 267 |
-
|
| 268 |
tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
|
| 269 |
res = {"text": text, "translated": translated, "audio": tts_res.get("audio")}
|
| 270 |
elif action == "health":
|
| 271 |
res = {"status": "awake", "time": time.ctime()}
|
| 272 |
else:
|
| 273 |
res = {"error": f"Unknown action: {action}"}
|
| 274 |
-
|
| 275 |
finally:
|
| 276 |
-
print(f"--- [
|
| 277 |
gc.collect()
|
| 278 |
if torch.cuda.is_available():
|
| 279 |
torch.cuda.empty_cache()
|
| 280 |
-
|
| 281 |
return res
|
| 282 |
|
| 283 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
|
@@ -296,13 +304,11 @@ def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
|
| 296 |
header.extend((0xFFFFFFFF).to_bytes(4, 'little'))
|
| 297 |
return bytes(header)
|
| 298 |
|
| 299 |
-
# π Sync Generator for ZeroGPU
|
| 300 |
@spaces.GPU
|
| 301 |
def gpu_tts_generator(text, lang, speaker_wav_path):
|
| 302 |
load_models()
|
| 303 |
try:
|
| 304 |
yield bytes(create_wav_header(sample_rate=24000))
|
| 305 |
-
# inference_stream is a generator
|
| 306 |
for chunk in MODELS["tts"].synthesizer.tts_model.inference_stream(
|
| 307 |
text,
|
| 308 |
lang,
|
|
@@ -320,15 +326,12 @@ def gpu_tts_generator(text, lang, speaker_wav_path):
|
|
| 320 |
if torch.cuda.is_available():
|
| 321 |
torch.cuda.empty_cache()
|
| 322 |
|
| 323 |
-
# --- FastAPI Entry Points ---
|
| 324 |
app = FastAPI()
|
| 325 |
|
| 326 |
@app.post("/api/v1/process")
|
| 327 |
async def api_process(request: Request):
|
| 328 |
-
"""Async endpoint. Routes to CPU (STT/Translate) or Hybrid (S2ST/TTS)"""
|
| 329 |
try:
|
| 330 |
data = await request.json()
|
| 331 |
-
# Direct call to the hybrid process
|
| 332 |
result = core_process(data)
|
| 333 |
return result
|
| 334 |
except Exception as e:
|
|
@@ -337,7 +340,6 @@ async def api_process(request: Request):
|
|
| 337 |
|
| 338 |
@app.post("/api/v1/tts_stream")
|
| 339 |
async def api_tts_stream(request: Request):
|
| 340 |
-
"""Async entry point for StreamingResponse"""
|
| 341 |
try:
|
| 342 |
data = await request.json()
|
| 343 |
speaker_wav_b64 = data.get("speaker_wav")
|
|
@@ -349,11 +351,7 @@ async def api_tts_stream(request: Request):
|
|
| 349 |
speaker_wav_path = f.name
|
| 350 |
else:
|
| 351 |
speaker_wav_path = "default_speaker.wav"
|
| 352 |
-
|
| 353 |
-
return StreamingResponse(
|
| 354 |
-
gpu_tts_generator(data.get("text"), data.get("lang"), speaker_wav_path),
|
| 355 |
-
media_type="audio/wav"
|
| 356 |
-
)
|
| 357 |
except Exception as e:
|
| 358 |
return {"error": str(e)}
|
| 359 |
|
|
@@ -363,46 +361,27 @@ def health():
|
|
| 363 |
|
| 364 |
@app.post("/api/v1/clear_cache")
|
| 365 |
async def clear_cache():
|
| 366 |
-
"""Manual deep cleanup of memory and caches"""
|
| 367 |
try:
|
| 368 |
t0 = time.time()
|
| 369 |
-
print("π§Ή Manual Cache Clearing Triggered...")
|
| 370 |
-
|
| 371 |
-
# 1. GC collect
|
| 372 |
gc.collect()
|
| 373 |
-
|
| 374 |
-
# 2. CUDA cache
|
| 375 |
-
if torch.cuda.is_available():
|
| 376 |
-
torch.cuda.empty_cache()
|
| 377 |
-
|
| 378 |
-
# 3. Clean temp files
|
| 379 |
temp_dir = tempfile.gettempdir()
|
| 380 |
count = 0
|
| 381 |
for f in os.listdir(temp_dir):
|
| 382 |
if f.endswith(".wav") or f.startswith("tm"):
|
| 383 |
-
try:
|
| 384 |
-
os.unlink(os.path.join(temp_dir, f))
|
| 385 |
-
count += 1
|
| 386 |
except: pass
|
| 387 |
-
|
| 388 |
-
return {
|
| 389 |
-
"status": "success",
|
| 390 |
-
"cleaned_files": count,
|
| 391 |
-
"duration": f"{time.time()-t0:.2f}s",
|
| 392 |
-
"gpu_memory": f"{torch.cuda.memory_allocated() / 1024**2:.2f}MB" if torch.cuda.is_available() else "N/A"
|
| 393 |
-
}
|
| 394 |
except Exception as e:
|
| 395 |
return {"status": "error", "message": str(e)}
|
| 396 |
|
| 397 |
-
# --- Gradio UI ---
|
| 398 |
def gradio_fn(req_json):
|
| 399 |
-
try:
|
| 400 |
-
|
| 401 |
-
except Exception as e:
|
| 402 |
-
return json.dumps({"error": str(e)})
|
| 403 |
|
| 404 |
demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="π AI Engine")
|
| 405 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 406 |
|
| 407 |
if __name__ == "__main__":
|
|
|
|
| 408 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 61 |
|
| 62 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 63 |
|
| 64 |
+
# FORCE BUILD TRIGGER: 10:00:00 Jan 21 2026
|
| 65 |
+
# v85: ZeroGPU Warmup & Pre-caching (Prevents session timeouts)
|
| 66 |
|
| 67 |
# π οΈ Monkeypatch torchaudio.load
|
| 68 |
try:
|
|
|
|
| 93 |
def load_models():
|
| 94 |
global MODELS
|
| 95 |
if MODELS["stt"] is None:
|
| 96 |
+
print("ποΈ Loading Faster-Whisper large-v3 into Engine...")
|
| 97 |
from faster_whisper import WhisperModel
|
| 98 |
if torch.cuda.is_available():
|
| 99 |
print(f"π GPU Detected: {torch.cuda.get_device_name(0)}")
|
|
|
|
| 108 |
torch.cuda.empty_cache()
|
| 109 |
|
| 110 |
# Initialize Chatterbox ONNX (High-Speed Fallback)
|
|
|
|
| 111 |
chatterbox_utils.load_chatterbox(device="cuda" if torch.cuda.is_available() else "cpu")
|
| 112 |
|
| 113 |
if MODELS["translate"] is None:
|
|
|
|
| 122 |
except: pass
|
| 123 |
|
| 124 |
if MODELS["tts"] is None:
|
| 125 |
+
print("π Loading XTTS-v2 into Engine...")
|
| 126 |
from TTS.api import TTS
|
| 127 |
try:
|
| 128 |
MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
|
|
|
|
| 131 |
print(f"β Failed to load XTTS: {e}")
|
| 132 |
raise e
|
| 133 |
|
| 134 |
+
def warmup_models():
|
| 135 |
+
"""Download and cache all models on CPU at startup (Prevents GPU timeouts)"""
|
| 136 |
+
print("\nπ₯ --- SYSTEM WARMUP STARTING (CPU) ---")
|
| 137 |
+
start = time.time()
|
| 138 |
+
try:
|
| 139 |
+
# 1. Warmup Whisper
|
| 140 |
+
print("π₯ Initializing Whisper large-v3 cache...")
|
| 141 |
+
from faster_whisper import WhisperModel
|
| 142 |
+
# Use simple init to trigger download
|
| 143 |
+
_ = WhisperModel("large-v3", device="cpu", compute_type="int8")
|
| 144 |
+
|
| 145 |
+
# 2. Warmup XTTS-v2 (This takes the longest)
|
| 146 |
+
print("π₯ Initializing XTTS-v2 cache...")
|
| 147 |
+
from TTS.api import TTS
|
| 148 |
+
# Initialize once on CPU just to force download
|
| 149 |
+
_ = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
|
| 150 |
+
|
| 151 |
+
# 3. Warmup DeepFilterNet
|
| 152 |
+
print("π₯ Initializing DeepFilterNet cache...")
|
| 153 |
+
try: init_df()
|
| 154 |
+
except: pass
|
| 155 |
+
|
| 156 |
+
# 4. Warmup Chatterbox
|
| 157 |
+
chatterbox_utils.warmup_chatterbox()
|
| 158 |
+
|
| 159 |
+
print(f"β
--- SYSTEM WARMUP COMPLETE (Time: {time.time()-start:.2f}s) --- \n")
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"β οΈ Warmup warning: {e}")
|
| 162 |
+
|
| 163 |
def _stt_logic(request_dict):
|
| 164 |
"""STT Logic (Runs on GPU when called via core_process)"""
|
| 165 |
audio_bytes = base64.b64decode(request_dict.get("file"))
|
|
|
|
| 186 |
if not text or not text.strip():
|
| 187 |
return {"error": "TTS Error: Input text is empty"}
|
| 188 |
|
|
|
|
|
|
|
| 189 |
XTTS_MAP = {
|
| 190 |
"en": "en", "en-us": "en", "en-gb": "en",
|
| 191 |
"de": "de", "de-de": "de",
|
|
|
|
| 206 |
}
|
| 207 |
|
| 208 |
XTTS_LANG_CODES = set(XTTS_MAP.values())
|
|
|
|
| 209 |
mapped_lang = None
|
| 210 |
if lang:
|
| 211 |
lang_key = lang.strip().lower()
|
| 212 |
mapped_lang = XTTS_MAP.get(lang_key) or XTTS_MAP.get(lang_key.split('-')[0])
|
| 213 |
|
| 214 |
+
print(f"[v85] TTS Request - Original: {lang}, Mapped: {mapped_lang}")
|
| 215 |
|
|
|
|
|
|
|
| 216 |
if mapped_lang and mapped_lang in XTTS_LANG_CODES:
|
| 217 |
+
print(f"[v85] Using XTTS-v2 for '{mapped_lang}'")
|
| 218 |
speaker_wav_path = None
|
| 219 |
if speaker_wav_b64:
|
| 220 |
sb = base64.b64decode(speaker_wav_b64)
|
|
|
|
| 228 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 229 |
output_path = output_file.name
|
| 230 |
|
|
|
|
| 231 |
MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=output_path, speaker_wav=speaker_wav_path)
|
| 232 |
|
| 233 |
with open(output_path, "rb") as f:
|
|
|
|
| 238 |
if os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
|
| 239 |
if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
|
| 240 |
|
| 241 |
+
print(f"[v85] Using Chatterbox ONNX Fallback for '{lang}'")
|
|
|
|
| 242 |
try:
|
|
|
|
| 243 |
temp_ref = None
|
| 244 |
if speaker_wav_b64:
|
| 245 |
sb = base64.b64decode(speaker_wav_b64)
|
| 246 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 247 |
f.write(sb); temp_ref = f.name
|
| 248 |
|
|
|
|
| 249 |
chatter_lang = lang.strip().lower().split('-')[0]
|
| 250 |
audio_bytes = chatterbox_utils.run_chatterbox_inference(text, chatter_lang, speaker_wav_path=temp_ref)
|
|
|
|
| 251 |
if temp_ref and os.path.exists(temp_ref): os.unlink(temp_ref)
|
|
|
|
| 252 |
audio_b64 = base64.b64encode(audio_bytes).decode()
|
| 253 |
return {"audio": audio_b64}
|
| 254 |
except Exception as e:
|
|
|
|
| 257 |
|
| 258 |
@spaces.GPU
|
| 259 |
def core_process(request_dict):
|
| 260 |
+
"""Unified GPU Entry Point (v85)"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
action = request_dict.get("action")
|
| 262 |
t0 = time.time()
|
| 263 |
+
print(f"--- [v85] π GPU SESSION START: {action} at {time.ctime()} ---")
|
| 264 |
load_models()
|
| 265 |
|
| 266 |
try:
|
|
|
|
| 271 |
elif action == "tts":
|
| 272 |
res = _tts_logic(request_dict.get("text"), request_dict.get("lang"), request_dict.get("speaker_wav"))
|
| 273 |
elif action == "s2st":
|
|
|
|
| 274 |
stt_res = _stt_logic({"file": request_dict.get("file"), "lang": request_dict.get("source_lang")})
|
| 275 |
text = stt_res.get("text", "")
|
| 276 |
if not text: return {"error": "No speech detected"}
|
|
|
|
| 277 |
translated = _translate_logic(text, request_dict.get("target_lang"))
|
|
|
|
| 278 |
tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
|
| 279 |
res = {"text": text, "translated": translated, "audio": tts_res.get("audio")}
|
| 280 |
elif action == "health":
|
| 281 |
res = {"status": "awake", "time": time.ctime()}
|
| 282 |
else:
|
| 283 |
res = {"error": f"Unknown action: {action}"}
|
|
|
|
| 284 |
finally:
|
| 285 |
+
print(f"--- [v85] β¨ SESSION END: {action} (Total: {time.time()-t0:.2f}s) ---")
|
| 286 |
gc.collect()
|
| 287 |
if torch.cuda.is_available():
|
| 288 |
torch.cuda.empty_cache()
|
|
|
|
| 289 |
return res
|
| 290 |
|
| 291 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
|
|
|
| 304 |
header.extend((0xFFFFFFFF).to_bytes(4, 'little'))
|
| 305 |
return bytes(header)
|
| 306 |
|
|
|
|
| 307 |
@spaces.GPU
|
| 308 |
def gpu_tts_generator(text, lang, speaker_wav_path):
|
| 309 |
load_models()
|
| 310 |
try:
|
| 311 |
yield bytes(create_wav_header(sample_rate=24000))
|
|
|
|
| 312 |
for chunk in MODELS["tts"].synthesizer.tts_model.inference_stream(
|
| 313 |
text,
|
| 314 |
lang,
|
|
|
|
| 326 |
if torch.cuda.is_available():
|
| 327 |
torch.cuda.empty_cache()
|
| 328 |
|
|
|
|
| 329 |
app = FastAPI()
|
| 330 |
|
| 331 |
@app.post("/api/v1/process")
|
| 332 |
async def api_process(request: Request):
|
|
|
|
| 333 |
try:
|
| 334 |
data = await request.json()
|
|
|
|
| 335 |
result = core_process(data)
|
| 336 |
return result
|
| 337 |
except Exception as e:
|
|
|
|
| 340 |
|
| 341 |
@app.post("/api/v1/tts_stream")
|
| 342 |
async def api_tts_stream(request: Request):
|
|
|
|
| 343 |
try:
|
| 344 |
data = await request.json()
|
| 345 |
speaker_wav_b64 = data.get("speaker_wav")
|
|
|
|
| 351 |
speaker_wav_path = f.name
|
| 352 |
else:
|
| 353 |
speaker_wav_path = "default_speaker.wav"
|
| 354 |
+
return StreamingResponse(gpu_tts_generator(data.get("text"), data.get("lang"), speaker_wav_path), media_type="audio/wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
except Exception as e:
|
| 356 |
return {"error": str(e)}
|
| 357 |
|
|
|
|
| 361 |
|
| 362 |
@app.post("/api/v1/clear_cache")
|
| 363 |
async def clear_cache():
|
|
|
|
| 364 |
try:
|
| 365 |
t0 = time.time()
|
|
|
|
|
|
|
|
|
|
| 366 |
gc.collect()
|
| 367 |
+
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
temp_dir = tempfile.gettempdir()
|
| 369 |
count = 0
|
| 370 |
for f in os.listdir(temp_dir):
|
| 371 |
if f.endswith(".wav") or f.startswith("tm"):
|
| 372 |
+
try: os.unlink(os.path.join(temp_dir, f)); count += 1
|
|
|
|
|
|
|
| 373 |
except: pass
|
| 374 |
+
return {"status": "success", "cleaned_files": count, "duration": f"{time.time()-t0:.2f}s"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
except Exception as e:
|
| 376 |
return {"status": "error", "message": str(e)}
|
| 377 |
|
|
|
|
| 378 |
def gradio_fn(req_json):
|
| 379 |
+
try: return json.dumps(core_process(json.loads(req_json)))
|
| 380 |
+
except Exception as e: return json.dumps({"error": str(e)})
|
|
|
|
|
|
|
| 381 |
|
| 382 |
demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="π AI Engine")
|
| 383 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 384 |
|
| 385 |
if __name__ == "__main__":
|
| 386 |
+
warmup_models()
|
| 387 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|