Spaces:

CDOM201
/

chatterbox_dimabot

Paused

App Files Files Community

CDOM201 commited on Dec 25, 2025

Commit

15a3086

verified ·

1 Parent(s): e8ceee6

Upload 2 files

Browse files

Files changed (2) hide show

download_model.py +1 -1
main.py +18 -3

download_model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from huggingface_hub import snapshot_download
 print("Downloading model weights (cache only)...")
-# The correct repository for chatterbox-tts
 snapshot_download(
     repo_id="ResembleAI/chatterbox",
     allow_patterns=["ve.pt", "t3_mtl23ls_v2.safetensors", "s3gen.pt", "grapheme_mtl_merged_expanded_v1.json", "conds.pt", "Cangjie5_TC.json"]

 from huggingface_hub import snapshot_download
 print("Downloading model weights (cache only)...")
+# The correct repository for chatterbox-tts (Multilingual)
 snapshot_download(
     repo_id="ResembleAI/chatterbox",
     allow_patterns=["ve.pt", "t3_mtl23ls_v2.safetensors", "s3gen.pt", "grapheme_mtl_merged_expanded_v1.json", "conds.pt", "Cangjie5_TC.json"]

main.py CHANGED Viewed

@@ -7,6 +7,7 @@ from pydantic import BaseModel
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import functools
 import uvicorn
 # Patch torch.load for CPU if necessary (as in app.py)
 # torch.load = functools.partial(torch.load, map_location='cpu')
@@ -16,11 +17,21 @@ app = FastAPI()
 # 1. Determine device dynamically
 device_map = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"CUDA Available: {torch.cuda.is_available()}")
 print(f"Using device: {device_map} with name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 print("Loading TTS model...")
 tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
 print("Model loaded.")
 class TTSRequest(BaseModel):
@@ -43,6 +54,7 @@ def generate_audio(req: TTSRequest) -> str:
     """Generates audio and returns the filename."""
     os.makedirs("outputs", exist_ok=True)
     filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
     try:
         audio_tensor = tts_model.generate(req.message, language_id=req.language)
         ta.save(filename, audio_tensor, tts_model.sr)
@@ -52,20 +64,23 @@ def generate_audio(req: TTSRequest) -> str:
 @app.post("/tts")
 async def tts_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
-    filename = generate_audio(req)
     background_tasks.add_task(cleanup_file, filename)
     return FileResponse(path=filename, filename=filename, media_type='audio/wav')
 @app.post("/stream")
 async def stream_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
-    filename = generate_audio(req)
     background_tasks.add_task(cleanup_file, filename)
     # FileResponse handles streaming efficiently for large files
     return FileResponse(path=filename, media_type='audio/wav')
 @app.post("/test")
 async def test_endpoint(req: TTSRequest):
-    filename = generate_audio(req)
     # For /test, we don't delete the file and just return "ok"
     return {"status": "ok", "filename": filename}

 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import functools
 import uvicorn
+import asyncio
 # Patch torch.load for CPU if necessary (as in app.py)
 # torch.load = functools.partial(torch.load, map_location='cpu')
 # 1. Determine device dynamically
 device_map = "cuda" if torch.cuda.is_available() else "cpu"
+# Create a lock to ensure only one generation happens at a time (important for GPU)
+model_lock = asyncio.Lock()
 print(f"CUDA Available: {torch.cuda.is_available()}")
 print(f"Using device: {device_map} with name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 print("Loading TTS model...")
+# Using Multilingual model as requested
 tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
+# Optimize for T4 GPU using half-precision (FP16)
+# FP16 provides a significant speed boost with negligible quality loss
+if device_map == "cuda":
+    tts_model.to(torch.float16)
 print("Model loaded.")
 class TTSRequest(BaseModel):
     """Generates audio and returns the filename."""
     os.makedirs("outputs", exist_ok=True)
     filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
     try:
         audio_tensor = tts_model.generate(req.message, language_id=req.language)
         ta.save(filename, audio_tensor, tts_model.sr)
 @app.post("/tts")
 async def tts_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
+    async with model_lock:
+        filename = await asyncio.to_thread(generate_audio, req)
     background_tasks.add_task(cleanup_file, filename)
     return FileResponse(path=filename, filename=filename, media_type='audio/wav')
 @app.post("/stream")
 async def stream_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
+    async with model_lock:
+        filename = await asyncio.to_thread(generate_audio, req)
     background_tasks.add_task(cleanup_file, filename)
     # FileResponse handles streaming efficiently for large files
     return FileResponse(path=filename, media_type='audio/wav')
 @app.post("/test")
 async def test_endpoint(req: TTSRequest):
+    async with model_lock:
+        filename = await asyncio.to_thread(generate_audio, req)
     # For /test, we don't delete the file and just return "ok"
     return {"status": "ok", "filename": filename}