Gaoussin commited on
Commit
7642f85
·
verified ·
1 Parent(s): 23b28bf

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +71 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.10-slim
3
+
4
+ # System dependencies
5
+ RUN apt-get update && apt-get install -y git ffmpeg libsndfile1 && rm -rf /var/lib/apt/lists/*
6
+
7
+ # Set workdir
8
+ WORKDIR /code
9
+
10
+ # Install Python dependencies
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy FastAPI app
15
+ COPY app.py .
16
+
17
+ # Expose default HF port
18
+ EXPOSE 7860
19
+
20
+ # Start FastAPI
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ os.environ["HF_HOME"] = "/tmp/hf"
4
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
5
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/hf"
6
+ os.makedirs("/tmp/hf", exist_ok=True)
7
+
8
+ from fastapi import FastAPI, Query
9
+ from fastapi.responses import StreamingResponse
10
+ from transformers import VitsModel, AutoTokenizer
11
+ import torch, scipy.io.wavfile as wavfile
12
+ import io
13
+ import edge_tts
14
+
15
+
16
+ app = FastAPI(title="Bambara TTS API")
17
+
18
+ # Load model once at startup
19
+ model = VitsModel.from_pretrained("facebook/mms-tts-bam")
20
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-bam")
21
+ sampling_rate = model.config.sampling_rate
22
+
23
+
24
+ @app.get("/tts/")
25
+ async def tts(text: str = Query(..., description="Bambara text to synthesize")):
26
+ inputs = tokenizer(text, return_tensors="pt")
27
+ inputs = {k: v.to("cpu") for k, v in inputs.items()}
28
+
29
+ with torch.no_grad():
30
+ output = model(**inputs).waveform
31
+
32
+ waveform = output[0]
33
+
34
+ # Stream audio instead of saving to disk
35
+ buffer = io.BytesIO()
36
+ wavfile.write(buffer, rate=sampling_rate, data=waveform.numpy())
37
+ buffer.seek(0)
38
+
39
+ return StreamingResponse(buffer, media_type="audio/wav")
40
+
41
+
42
+ @app.get("/noneBmTts/")
43
+ async def noneBmTts(
44
+ text: str = Query(..., description="Text to synthesize"),
45
+ voice: str = Query(
46
+ "fr-FR-DeniseNeural", description="Voice ID (e.g., en-US-GuyNeural)"
47
+ ),
48
+ ):
49
+ try:
50
+ # Create the Communicate object with the requested text and voice
51
+ communicate = edge_tts.Communicate(text, voice)
52
+
53
+ buffer = io.BytesIO()
54
+
55
+ # Stream the audio chunks into the buffer
56
+ async for chunk in communicate.stream():
57
+ if chunk["type"] == "audio":
58
+ buffer.write(chunk["data"])
59
+
60
+ # Check if we actually got data
61
+ if buffer.tell() == 0:
62
+ raise HTTPException(
63
+ status_code=400, detail="Synthesis failed to produce audio."
64
+ )
65
+
66
+ buffer.seek(0)
67
+ return StreamingResponse(buffer, media_type="audio/mpeg")
68
+
69
+ except Exception as e:
70
+ # Catch errors like invalid voice names
71
+ raise HTTPException(status_code=400, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers==4.44.2
4
+ accelerate
5
+ torch
6
+ scipy
7
+ edge-tts