Spaces:

CDOM201
/

chatterbox_dimabot

Paused

App Files Files Community

CDOM201 commited on Dec 25, 2025

Commit

5c31db9

verified ·

1 Parent(s): eb90de9

Upload 6 files

Browse files

Files changed (6) hide show

.dockerignore +9 -0
Dockerfile +42 -0
app.py +23 -0
download_model.py +8 -0
main.py +74 -0
requirements.txt +11 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+venv/
+__pycache__/
+*.wav
+.git/
+.gitignore
+.env
+testing.js
+node_modules/

Dockerfile ADDED Viewed

	@@ -0,0 +1,42 @@

+# Use an official PyTorch image with CUDA support
+FROM pytorch/pytorch:2.5.1-cuda11.8-cudnn9-runtime
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=7860
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Create a non-root user and switch to it
+# Hugging Face Spaces runs as user 1000
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Copy the rest of the application code
+COPY --chown=user . .
+# Pre-download the model weights during build time
+RUN python download_model.py
+# Expose the port (Hugging Face Spaces expects 7860)
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torchaudio as ta
+import torch
+from chatterbox.mtl_tts import ChatterboxMultilingualTTS
+import functools
+# torch.load = functools.partial(torch.load, map_location='cpu')
+# device_map = torch.device('cpu')
+device_map = None
+if torch.cuda.is_available():
+    device_map = torch.device('cuda')
+else:
+    device_map = torch.device('cpu')
+print(f"Using device: {device_map}")
+tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
+streamer_lang = "es"
+msg = "CDOM201 dice: Como estas pandita, igual de puto como siempre?"
+audio_file = tts_model.generate(msg, language_id=streamer_lang)
+ta.save("sleeplespanda.wav", audio_file, tts_model.sr);

download_model.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+from chatterbox.mtl_tts import ChatterboxMultilingualTTS
+print("Downloading model...")
+# We use cpu here just to download the weights to the cache during build time
+ChatterboxMultilingualTTS.from_pretrained(device="cpu")
+print("Model downloaded successfully.")

main.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import torch
+import torchaudio as ta
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+from chatterbox.mtl_tts import ChatterboxMultilingualTTS
+import functools
+import uvicorn
+# Patch torch.load for CPU if necessary (as in app.py)
+# torch.load = functools.partial(torch.load, map_location='cpu')
+app = FastAPI()
+# 1. Determine device dynamically
+device_map = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"CUDA Available: {torch.cuda.is_available()}")
+print(f"Using device: {device_map} with name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+print("Loading TTS model...")
+tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
+print("Model loaded.")
+class TTSRequest(BaseModel):
+    message: str
+    language: str
+    channelID: str
+    username: str
+    messageid: str
+def cleanup_file(filepath: str):
+    """Deletes the file after it has been sent."""
+    try:
+        if os.path.exists(filepath):
+            os.remove(filepath)
+            print(f"Deleted temporary file: {filepath}")
+    except Exception as e:
+        print(f"Error deleting file {filepath}: {e}")
+def generate_audio(req: TTSRequest) -> str:
+    """Generates audio and returns the filename."""
+    filename = f"{req.channelID}-{req.username}-{req.messageid}.wav"
+    try:
+        audio_tensor = tts_model.generate(req.message, language_id=req.language)
+        ta.save(filename, audio_tensor, tts_model.sr)
+        return filename
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"TTS Generation failed: {str(e)}")
+@app.post("/tts")
+async def tts_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
+    filename = generate_audio(req)
+    background_tasks.add_task(cleanup_file, filename)
+    return FileResponse(path=filename, filename=filename, media_type='audio/wav')
+@app.post("/stream")
+async def stream_endpoint(req: TTSRequest, background_tasks: BackgroundTasks):
+    filename = generate_audio(req)
+    background_tasks.add_task(cleanup_file, filename)
+    # FileResponse handles streaming efficiently for large files
+    return FileResponse(path=filename, media_type='audio/wav')
+@app.post("/test")
+async def test_endpoint(req: TTSRequest):
+    filename = generate_audio(req)
+    # For /test, we don't delete the file and just return "ok"
+    return {"status": "ok", "filename": filename}
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi==0.127.0
+uvicorn==0.40.0
+pydantic==2.11.10
+chatterbox-tts==0.1.6
+python-multipart==0.0.21
+numpy==1.25.2
+scipy==1.16.3
+librosa==0.11.0
+soundfile==0.13.1
+aiofiles==24.1.0