DP27 commited on
Commit
c402391
·
verified ·
1 Parent(s): a3e3fb3

upload fullcode

Browse files
Files changed (4) hide show
  1. app.py +133 -0
  2. dockerfile +24 -0
  3. requirements.txt +6 -0
  4. static/index.html +249 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ from fastapi.responses import FileResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ import os
7
+ import uuid
8
+ import tempfile
9
+ from typing import Optional
10
+ import soundfile as sf
11
+
12
+ # Import your TTS dependencies
13
+ from kokoro import KPipeline
14
+
15
+ # Initialize the TTS pipeline
16
+ pipeline = KPipeline(lang_code='a') # Make sure lang_code matches voice
17
+
18
+ # Initialize FastAPI app
19
+ app = FastAPI(title="Kokoro TTS API Service")
20
+
21
+ # Add CORS middleware to allow frontend requests
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"], # In production, replace with your domains
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Mount static files directory
31
+ app.mount("/static", StaticFiles(directory="static"), name="static")
32
+
33
+ # Create temp directory to store audio files
34
+ TEMP_DIR = tempfile.gettempdir()
35
+ os.makedirs(TEMP_DIR, exist_ok=True)
36
+
37
+ def tts(text, file_name, voice='af_bella', speed=0.9):
38
+ """
39
+ Generate speech from text using Kokoro TTS
40
+
41
+ Args:
42
+ text (str): Text to convert to speech
43
+ file_name (str): Path to save the output .wav file
44
+ voice (str): Voice to use for TTS
45
+ speed (float): Speed of speech
46
+
47
+ Returns:
48
+ str: Path to the generated audio file
49
+ """
50
+ try:
51
+ generator = pipeline(
52
+ text, voice=voice,
53
+ speed=speed, split_pattern=None
54
+ )
55
+
56
+ for i, (gs, ps, audio) in enumerate(generator):
57
+ sf.write(file_name, audio, 24000) # save audio file
58
+
59
+ return file_name
60
+ except Exception as e:
61
+ raise Exception(f"TTS generation failed: {str(e)}")
62
+
63
+ class TTSRequest(BaseModel):
64
+ text: str
65
+ voice: str = "af_bella"
66
+ speed: float = 0.9
67
+
68
+ @app.post("/tts/")
69
+ async def text_to_speech(request: TTSRequest):
70
+ """
71
+ Convert text to speech and return a .wav file
72
+ """
73
+ try:
74
+ # Generate a unique filename
75
+ filename = f"{uuid.uuid4()}.wav"
76
+ output_path = os.path.join(TEMP_DIR, filename)
77
+
78
+ # Generate speech using your TTS function
79
+ tts(request.text, output_path, request.voice, request.speed)
80
+
81
+ # Return the audio file
82
+ return FileResponse(
83
+ path=output_path,
84
+ filename=filename,
85
+ media_type="audio/wav"
86
+ )
87
+ except Exception as e:
88
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
89
+
90
+ @app.get("/tts-get/")
91
+ async def text_to_speech_get(
92
+ text: str = Query(..., description="Text to convert to speech"),
93
+ voice: str = Query("af_bella", description="Voice to use for TTS"),
94
+ speed: float = Query(0.9, description="Speed of speech (0.5-1.5)")
95
+ ):
96
+ """
97
+ GET endpoint for text-to-speech conversion
98
+ """
99
+ try:
100
+ # Generate a unique filename
101
+ filename = f"{uuid.uuid4()}.wav"
102
+ output_path = os.path.join(TEMP_DIR, filename)
103
+
104
+ # Generate speech using your TTS function
105
+ tts(text, output_path, voice, speed)
106
+
107
+ # Return the audio file
108
+ return FileResponse(
109
+ path=output_path,
110
+ filename=filename,
111
+ media_type="audio/wav"
112
+ )
113
+ except Exception as e:
114
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
115
+
116
+ @app.get("/voices/")
117
+ async def available_voices():
118
+ """
119
+ Return a list of available voices
120
+ """
121
+ # This is a placeholder - you should replace with actual available voices
122
+ # from your kokoro library
123
+ return {
124
+ "voices": ["af_bella"], # Add other available voices here
125
+ "default": "af_bella"
126
+ }
127
+
128
+ @app.get("/")
129
+ async def root():
130
+ """
131
+ Serve the frontend HTML
132
+ """
133
+ return FileResponse('static/index.html')
dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+
7
+ # Install system dependencies for soundfile
8
+ RUN apt-get update && \
9
+ apt-get install -y --no-install-recommends \
10
+ gcc \
11
+ libc6-dev \
12
+ libsndfile1 \
13
+ && pip install --no-cache-dir -r requirements.txt \
14
+ && apt-get remove -y gcc libc6-dev \
15
+ && apt-get autoremove -y \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY . .
19
+
20
+ # Make sure to expose the port your app uses
21
+ EXPOSE 8000
22
+
23
+ # Command to run the app
24
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.23.2
3
+ pydantic==2.4.2
4
+ python-multipart==0.0.6
5
+ kokoro
6
+ soundfile==0.12.1
static/index.html ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Text-to-Speech Service</title>
7
+ <style>
8
+ body {
9
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
10
+ max-width: 800px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ background-color: #f8f9fa;
14
+ color: #333;
15
+ }
16
+ .container {
17
+ background-color: white;
18
+ border-radius: 8px;
19
+ padding: 30px;
20
+ box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
21
+ }
22
+ h1 {
23
+ text-align: center;
24
+ color: #2c3e50;
25
+ margin-bottom: 30px;
26
+ }
27
+ .form-group {
28
+ margin-bottom: 20px;
29
+ }
30
+ label {
31
+ display: block;
32
+ margin-bottom: 8px;
33
+ font-weight: 600;
34
+ }
35
+ textarea, select, input {
36
+ width: 100%;
37
+ padding: 10px;
38
+ border: 1px solid #ddd;
39
+ border-radius: 4px;
40
+ font-size: 16px;
41
+ box-sizing: border-box;
42
+ }
43
+ textarea {
44
+ height: 120px;
45
+ resize: vertical;
46
+ }
47
+ button {
48
+ background-color: #4CAF50;
49
+ color: white;
50
+ border: none;
51
+ padding: 12px 20px;
52
+ border-radius: 4px;
53
+ cursor: pointer;
54
+ font-size: 16px;
55
+ display: block;
56
+ width: 100%;
57
+ transition: background-color 0.3s;
58
+ }
59
+ button:hover {
60
+ background-color: #45a049;
61
+ }
62
+ button:disabled {
63
+ background-color: #cccccc;
64
+ cursor: not-allowed;
65
+ }
66
+ .audio-container {
67
+ margin-top: 30px;
68
+ text-align: center;
69
+ display: none;
70
+ }
71
+ .audio-container audio {
72
+ width: 100%;
73
+ margin-top: 10px;
74
+ }
75
+ .status {
76
+ text-align: center;
77
+ margin-top: 20px;
78
+ font-style: italic;
79
+ color: #666;
80
+ }
81
+ .error {
82
+ color: #e74c3c;
83
+ text-align: center;
84
+ margin-top: 20px;
85
+ }
86
+ .loading {
87
+ display: none;
88
+ text-align: center;
89
+ margin: 20px 0;
90
+ }
91
+ .loading-spinner {
92
+ display: inline-block;
93
+ width: 30px;
94
+ height: 30px;
95
+ border: 3px solid rgba(0, 0, 0, 0.1);
96
+ border-radius: 50%;
97
+ border-top-color: #4CAF50;
98
+ animation: spin 1s linear infinite;
99
+ }
100
+ @keyframes spin {
101
+ to { transform: rotate(360deg); }
102
+ }
103
+ </style>
104
+ </head>
105
+ <body>
106
+ <div class="container">
107
+ <h1>Text-to-Speech Service</h1>
108
+
109
+ <div class="form-group">
110
+ <label for="text">Enter text to convert to speech:</label>
111
+ <textarea id="text" placeholder="Type your text here..."></textarea>
112
+ </div>
113
+
114
+ <div class="form-group">
115
+ <label for="voice">Select voice:</label>
116
+ <select id="voice">
117
+ <option value="af_bella">Bella (African)</option>
118
+ <!-- More voices will be loaded dynamically -->
119
+ </select>
120
+ </div>
121
+
122
+ <div class="form-group">
123
+ <label for="speed">Speech speed: <span id="speedValue">0.9</span></label>
124
+ <input type="range" id="speed" min="0.5" max="1.5" step="0.1" value="0.9">
125
+ </div>
126
+
127
+ <button id="convertBtn">Convert to Speech</button>
128
+
129
+ <div class="loading">
130
+ <div class="loading-spinner"></div>
131
+ <p>Generating audio...</p>
132
+ </div>
133
+
134
+ <div id="error" class="error"></div>
135
+
136
+ <div id="audioContainer" class="audio-container">
137
+ <p>Your generated audio:</p>
138
+ <audio id="audioPlayer" controls></audio>
139
+ <p class="status">You can play or download this audio file.</p>
140
+ </div>
141
+ </div>
142
+
143
+ <script>
144
+ document.addEventListener('DOMContentLoaded', function() {
145
+ // Elements
146
+ const textArea = document.getElementById('text');
147
+ const voiceSelect = document.getElementById('voice');
148
+ const speedSlider = document.getElementById('speed');
149
+ const speedValue = document.getElementById('speedValue');
150
+ const convertBtn = document.getElementById('convertBtn');
151
+ const audioContainer = document.getElementById('audioContainer');
152
+ const audioPlayer = document.getElementById('audioPlayer');
153
+ const errorElement = document.getElementById('error');
154
+ const loading = document.querySelector('.loading');
155
+
156
+ // Update the speed value display when slider changes
157
+ speedSlider.addEventListener('input', function() {
158
+ speedValue.textContent = this.value;
159
+ });
160
+
161
+ // Fetch available voices (this would connect to your /voices/ endpoint)
162
+ fetch('/voices/')
163
+ .then(response => {
164
+ if (!response.ok) throw new Error('Failed to fetch voices');
165
+ return response.json();
166
+ })
167
+ .then(data => {
168
+ // Clear default option
169
+ voiceSelect.innerHTML = '';
170
+
171
+ // Add voices to select dropdown
172
+ data.voices.forEach(voice => {
173
+ const option = document.createElement('option');
174
+ option.value = voice;
175
+ option.textContent = voice;
176
+ // Set default voice
177
+ if (voice === data.default) {
178
+ option.selected = true;
179
+ }
180
+ voiceSelect.appendChild(option);
181
+ });
182
+ })
183
+ .catch(error => {
184
+ console.error('Error fetching voices:', error);
185
+ // If we can't fetch voices, at least keep the default one
186
+ });
187
+
188
+ // Handle the text-to-speech conversion
189
+ convertBtn.addEventListener('click', function() {
190
+ const text = textArea.value.trim();
191
+
192
+ // Validate input
193
+ if (!text) {
194
+ errorElement.textContent = 'Please enter some text to convert';
195
+ return;
196
+ }
197
+
198
+ errorElement.textContent = '';
199
+ audioContainer.style.display = 'none';
200
+ loading.style.display = 'block';
201
+ convertBtn.disabled = true;
202
+
203
+ // Prepare the request data
204
+ const requestData = {
205
+ text: text,
206
+ voice: voiceSelect.value,
207
+ speed: parseFloat(speedSlider.value)
208
+ };
209
+
210
+ // Make API request to the TTS endpoint
211
+ fetch('/tts/', {
212
+ method: 'POST',
213
+ headers: {
214
+ 'Content-Type': 'application/json'
215
+ },
216
+ body: JSON.stringify(requestData)
217
+ })
218
+ .then(response => {
219
+ if (!response.ok) {
220
+ return response.json().then(err => {
221
+ throw new Error(err.detail || 'Failed to generate speech');
222
+ });
223
+ }
224
+ return response.blob();
225
+ })
226
+ .then(blob => {
227
+ // Create URL for the audio blob
228
+ const audioUrl = URL.createObjectURL(blob);
229
+ audioPlayer.src = audioUrl;
230
+
231
+ // Display the audio player
232
+ audioContainer.style.display = 'block';
233
+
234
+ // Auto play (may be blocked by browsers)
235
+ audioPlayer.play().catch(e => console.log('Auto-play prevented'));
236
+ })
237
+ .catch(error => {
238
+ console.error('Error:', error);
239
+ errorElement.textContent = error.message || 'An error occurred while generating speech';
240
+ })
241
+ .finally(() => {
242
+ loading.style.display = 'none';
243
+ convertBtn.disabled = false;
244
+ });
245
+ });
246
+ });
247
+ </script>
248
+ </body>
249
+ </html>