wolf1997 commited on
Commit
157a15c
·
verified ·
1 Parent(s): 74d6f76

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +36 -0
  2. app.py +87 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Use Python 3.11.7 slim image as base
4
+ FROM python:3.11.7-slim
5
+
6
+ # Set up a new user named "user" with user ID 1000
7
+ RUN useradd -m -u 1000 user
8
+
9
+ # Switch to the "user" user
10
+ USER user
11
+
12
+ # Set home to the user's home directory
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ # Set the working directory to the user's home directory
17
+ WORKDIR $HOME/app
18
+ # Install system dependencies required for soundfile
19
+ RUN apt-get update && apt-get install -y \
20
+ libsndfile1 \
21
+ && rm -rf /var/lib/apt/lists/*
22
+
23
+ # Copy requirements first to leverage Docker cache
24
+ COPY requirements.txt .
25
+
26
+ # Install Python dependencies
27
+ RUN pip install --no-cache-dir -r requirements.txt
28
+
29
+ # Copy the application code
30
+ COPY . .
31
+
32
+ # Expose the port the app runs on
33
+ EXPOSE 8000
34
+
35
+ # Command to run the application
36
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import Response
3
+ from pydantic import BaseModel
4
+ from kokoro import KPipeline
5
+
6
+ import soundfile as sf
7
+ import torch
8
+ import os
9
+ import uuid
10
+ import numpy as np
11
+ import io
12
+ from typing import Optional
13
+
14
+
15
+
16
+ pipeline = KPipeline(lang_code='a')
17
+ app = FastAPI(title="Text to Speech API")
18
+
19
+ class TextToSpeechRequest(BaseModel):
20
+ text: str
21
+ language: Optional[str] = "en"
22
+ slow: Optional[bool] = False
23
+
24
+
25
+
26
+ def tensor_to_audio_bytes(audio_tensor: torch.Tensor, sample_rate: int = 24000) -> bytes:
27
+ """
28
+ Convert a float audio tensor to bytes.
29
+
30
+ Args:
31
+ audio_tensor (torch.Tensor): Input audio tensor of shape (samples,) or (channels, samples)
32
+ sample_rate (int): Sample rate of the audio in Hz. Default is 24000.
33
+
34
+ Returns:
35
+ bytes: Audio data in bytes format
36
+ """
37
+ # Ensure tensor is on CPU and convert to numpy
38
+ audio_np = audio_tensor.detach().cpu().numpy()
39
+
40
+ # Handle different input shapes
41
+ if len(audio_np.shape) == 1:
42
+ # Mono audio (samples,)
43
+ audio_np = audio_np.reshape(1, -1)
44
+ elif len(audio_np.shape) > 2:
45
+ raise ValueError(f"Expected 1D or 2D tensor, got shape {audio_np.shape}")
46
+
47
+ # Create a bytes buffer
48
+ buffer = io.BytesIO()
49
+
50
+ # Write audio data to buffer using soundfile
51
+ sf.write(buffer, audio_np.T, sample_rate, format='WAV')
52
+
53
+ # Get the bytes from the buffer
54
+ audio_bytes = buffer.getvalue()
55
+ buffer.close()
56
+
57
+ return audio_bytes
58
+
59
+ @app.post("/tts")
60
+ async def text_to_speech(request: TextToSpeechRequest):
61
+ try:
62
+
63
+
64
+ generator = pipeline(request.text, voice='af_heart')
65
+ for i, (gs, ps, audio) in enumerate(generator):
66
+ audio_tensor = audio
67
+
68
+ audio_bytes = tensor_to_audio_bytes(audio_tensor)
69
+
70
+ # Return audio bytes directly with appropriate headers
71
+ return Response(
72
+ content=audio_bytes,
73
+ media_type="audio/wav",
74
+ headers={
75
+ "Content-Disposition": "attachment; filename=speech.wav"
76
+ }
77
+ )
78
+ except Exception as e:
79
+ raise HTTPException(status_code=500, detail=str(e))
80
+
81
+ @app.get("/")
82
+ async def root():
83
+ return {"message": "Welcome to the Text to Speech API. Use POST /tts to convert text to speech. the body should be a json with the following fields: {'text': 'text to convert to speech', 'language': 'language code (optional, default is en)', 'slow': 'boolean (optional, default is False)'}"}
84
+
85
+ if __name__ == "__main__":
86
+ import uvicorn
87
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.12
2
+ kokoro==0.9.4
3
+ numpy==2.2.6
4
+ pydantic==2.11.4
5
+ soundfile==0.13.1
6
+ torch==2.7.0
7
+ uvicorn==0.34.2