Spaces:

Gaoussin
/

bm_speech

Sleeping

App Files Files Community

Gaoussin commited on Jan 9

Commit

7642f85

verified ·

1 Parent(s): 23b28bf

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +21 -0
app.py +71 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# Base image
+FROM python:3.10-slim
+# System dependencies
+RUN apt-get update && apt-get install -y git ffmpeg libsndfile1 && rm -rf /var/lib/apt/lists/*
+# Set workdir
+WORKDIR /code
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy FastAPI app
+COPY app.py .
+# Expose default HF port
+EXPOSE 7860
+# Start FastAPI
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+os.environ["HF_HOME"] = "/tmp/hf"
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
+os.environ["HF_DATASETS_CACHE"] = "/tmp/hf"
+os.makedirs("/tmp/hf", exist_ok=True)
+from fastapi import FastAPI, Query
+from fastapi.responses import StreamingResponse
+from transformers import VitsModel, AutoTokenizer
+import torch, scipy.io.wavfile as wavfile
+import io
+import edge_tts
+app = FastAPI(title="Bambara TTS API")
+# Load model once at startup
+model = VitsModel.from_pretrained("facebook/mms-tts-bam")
+tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-bam")
+sampling_rate = model.config.sampling_rate
+@app.get("/tts/")
+async def tts(text: str = Query(..., description="Bambara text to synthesize")):
+    inputs = tokenizer(text, return_tensors="pt")
+    inputs = {k: v.to("cpu") for k, v in inputs.items()}
+    with torch.no_grad():
+        output = model(**inputs).waveform
+    waveform = output[0]
+    # Stream audio instead of saving to disk
+    buffer = io.BytesIO()
+    wavfile.write(buffer, rate=sampling_rate, data=waveform.numpy())
+    buffer.seek(0)
+    return StreamingResponse(buffer, media_type="audio/wav")
+@app.get("/noneBmTts/")
+async def noneBmTts(
+    text: str = Query(..., description="Text to synthesize"),
+    voice: str = Query(
+        "fr-FR-DeniseNeural", description="Voice ID (e.g., en-US-GuyNeural)"
+    ),
+):
+    try:
+        # Create the Communicate object with the requested text and voice
+        communicate = edge_tts.Communicate(text, voice)
+        buffer = io.BytesIO()
+        # Stream the audio chunks into the buffer
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                buffer.write(chunk["data"])
+        # Check if we actually got data
+        if buffer.tell() == 0:
+            raise HTTPException(
+                status_code=400, detail="Synthesis failed to produce audio."
+            )
+        buffer.seek(0)
+        return StreamingResponse(buffer, media_type="audio/mpeg")
+    except Exception as e:
+        # Catch errors like invalid voice names
+        raise HTTPException(status_code=400, detail=str(e))

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+transformers==4.44.2
+accelerate
+torch
+scipy
+edge-tts