Spaces:

DP27
/

tts

No application file

App Files Files Community

DP27 commited on Feb 27, 2025

Commit

c402391

verified ·

1 Parent(s): a3e3fb3

upload fullcode

Browse files

Files changed (4) hide show

app.py +133 -0
dockerfile +24 -0
requirements.txt +6 -0
static/index.html +249 -0

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from fastapi import FastAPI, HTTPException, Query
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import os
+import uuid
+import tempfile
+from typing import Optional
+import soundfile as sf
+# Import your TTS dependencies
+from kokoro import KPipeline
+# Initialize the TTS pipeline
+pipeline = KPipeline(lang_code='a')  # Make sure lang_code matches voice
+# Initialize FastAPI app
+app = FastAPI(title="Kokoro TTS API Service")
+# Add CORS middleware to allow frontend requests
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, replace with your domains
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static files directory
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Create temp directory to store audio files
+TEMP_DIR = tempfile.gettempdir()
+os.makedirs(TEMP_DIR, exist_ok=True)
+def tts(text, file_name, voice='af_bella', speed=0.9):
+    """
+    Generate speech from text using Kokoro TTS
+    Args:
+        text (str): Text to convert to speech
+        file_name (str): Path to save the output .wav file
+        voice (str): Voice to use for TTS
+        speed (float): Speed of speech
+    Returns:
+        str: Path to the generated audio file
+    """
+    try:
+        generator = pipeline(
+            text, voice=voice,
+            speed=speed, split_pattern=None
+        )
+        for i, (gs, ps, audio) in enumerate(generator):
+            sf.write(file_name, audio, 24000)  # save audio file
+        return file_name
+    except Exception as e:
+        raise Exception(f"TTS generation failed: {str(e)}")
+class TTSRequest(BaseModel):
+    text: str
+    voice: str = "af_bella"
+    speed: float = 0.9
+@app.post("/tts/")
+async def text_to_speech(request: TTSRequest):
+    """
+    Convert text to speech and return a .wav file
+    """
+    try:
+        # Generate a unique filename
+        filename = f"{uuid.uuid4()}.wav"
+        output_path = os.path.join(TEMP_DIR, filename)
+        # Generate speech using your TTS function
+        tts(request.text, output_path, request.voice, request.speed)
+        # Return the audio file
+        return FileResponse(
+            path=output_path,
+            filename=filename,
+            media_type="audio/wav"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
+@app.get("/tts-get/")
+async def text_to_speech_get(
+    text: str = Query(..., description="Text to convert to speech"),
+    voice: str = Query("af_bella", description="Voice to use for TTS"),
+    speed: float = Query(0.9, description="Speed of speech (0.5-1.5)")
+):
+    """
+    GET endpoint for text-to-speech conversion
+    """
+    try:
+        # Generate a unique filename
+        filename = f"{uuid.uuid4()}.wav"
+        output_path = os.path.join(TEMP_DIR, filename)
+        # Generate speech using your TTS function
+        tts(text, output_path, voice, speed)
+        # Return the audio file
+        return FileResponse(
+            path=output_path,
+            filename=filename,
+            media_type="audio/wav"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
+@app.get("/voices/")
+async def available_voices():
+    """
+    Return a list of available voices
+    """
+    # This is a placeholder - you should replace with actual available voices
+    # from your kokoro library
+    return {
+        "voices": ["af_bella"],  # Add other available voices here
+        "default": "af_bella"
+    }
+@app.get("/")
+async def root():
+    """
+    Serve the frontend HTML
+    """
+    return FileResponse('static/index.html')

dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY requirements.txt .
+# Install system dependencies for soundfile
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc \
+    libc6-dev \
+    libsndfile1 \
+    && pip install --no-cache-dir -r requirements.txt \
+    && apt-get remove -y gcc libc6-dev \
+    && apt-get autoremove -y \
+    && rm -rf /var/lib/apt/lists/*
+COPY . .
+# Make sure to expose the port your app uses
+EXPOSE 8000
+# Command to run the app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi==0.104.1
+uvicorn==0.23.2
+pydantic==2.4.2
+python-multipart==0.0.6
+kokoro
+soundfile==0.12.1

static/index.html ADDED Viewed

	@@ -0,0 +1,249 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Text-to-Speech Service</title>
+    <style>
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+            color: #333;
+        }
+        .container {
+            background-color: white;
+            border-radius: 8px;
+            padding: 30px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+        }
+        h1 {
+            text-align: center;
+            color: #2c3e50;
+            margin-bottom: 30px;
+        }
+        .form-group {
+            margin-bottom: 20px;
+        }
+        label {
+            display: block;
+            margin-bottom: 8px;
+            font-weight: 600;
+        }
+        textarea, select, input {
+            width: 100%;
+            padding: 10px;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            font-size: 16px;
+            box-sizing: border-box;
+        }
+        textarea {
+            height: 120px;
+            resize: vertical;
+        }
+        button {
+            background-color: #4CAF50;
+            color: white;
+            border: none;
+            padding: 12px 20px;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 16px;
+            display: block;
+            width: 100%;
+            transition: background-color 0.3s;
+        }
+        button:hover {
+            background-color: #45a049;
+        }
+        button:disabled {
+            background-color: #cccccc;
+            cursor: not-allowed;
+        }
+        .audio-container {
+            margin-top: 30px;
+            text-align: center;
+            display: none;
+        }
+        .audio-container audio {
+            width: 100%;
+            margin-top: 10px;
+        }
+        .status {
+            text-align: center;
+            margin-top: 20px;
+            font-style: italic;
+            color: #666;
+        }
+        .error {
+            color: #e74c3c;
+            text-align: center;
+            margin-top: 20px;
+        }
+        .loading {
+            display: none;
+            text-align: center;
+            margin: 20px 0;
+        }
+        .loading-spinner {
+            display: inline-block;
+            width: 30px;
+            height: 30px;
+            border: 3px solid rgba(0, 0, 0, 0.1);
+            border-radius: 50%;
+            border-top-color: #4CAF50;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            to { transform: rotate(360deg); }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Text-to-Speech Service</h1>
+        <div class="form-group">
+            <label for="text">Enter text to convert to speech:</label>
+            <textarea id="text" placeholder="Type your text here..."></textarea>
+        </div>
+        <div class="form-group">
+            <label for="voice">Select voice:</label>
+            <select id="voice">
+                <option value="af_bella">Bella (African)</option>
+                <!-- More voices will be loaded dynamically -->
+            </select>
+        </div>
+        <div class="form-group">
+            <label for="speed">Speech speed: <span id="speedValue">0.9</span></label>
+            <input type="range" id="speed" min="0.5" max="1.5" step="0.1" value="0.9">
+        </div>
+        <button id="convertBtn">Convert to Speech</button>
+        <div class="loading">
+            <div class="loading-spinner"></div>
+            <p>Generating audio...</p>
+        </div>
+        <div id="error" class="error"></div>
+        <div id="audioContainer" class="audio-container">
+            <p>Your generated audio:</p>
+            <audio id="audioPlayer" controls></audio>
+            <p class="status">You can play or download this audio file.</p>
+        </div>
+    </div>
+    <script>
+        document.addEventListener('DOMContentLoaded', function() {
+            // Elements
+            const textArea = document.getElementById('text');
+            const voiceSelect = document.getElementById('voice');
+            const speedSlider = document.getElementById('speed');
+            const speedValue = document.getElementById('speedValue');
+            const convertBtn = document.getElementById('convertBtn');
+            const audioContainer = document.getElementById('audioContainer');
+            const audioPlayer = document.getElementById('audioPlayer');
+            const errorElement = document.getElementById('error');
+            const loading = document.querySelector('.loading');
+            // Update the speed value display when slider changes
+            speedSlider.addEventListener('input', function() {
+                speedValue.textContent = this.value;
+            });
+            // Fetch available voices (this would connect to your /voices/ endpoint)
+            fetch('/voices/')
+                .then(response => {
+                    if (!response.ok) throw new Error('Failed to fetch voices');
+                    return response.json();
+                })
+                .then(data => {
+                    // Clear default option
+                    voiceSelect.innerHTML = '';
+                    // Add voices to select dropdown
+                    data.voices.forEach(voice => {
+                        const option = document.createElement('option');
+                        option.value = voice;
+                        option.textContent = voice;
+                        // Set default voice
+                        if (voice === data.default) {
+                            option.selected = true;
+                        }
+                        voiceSelect.appendChild(option);
+                    });
+                })
+                .catch(error => {
+                    console.error('Error fetching voices:', error);
+                    // If we can't fetch voices, at least keep the default one
+                });
+            // Handle the text-to-speech conversion
+            convertBtn.addEventListener('click', function() {
+                const text = textArea.value.trim();
+                // Validate input
+                if (!text) {
+                    errorElement.textContent = 'Please enter some text to convert';
+                    return;
+                }
+                errorElement.textContent = '';
+                audioContainer.style.display = 'none';
+                loading.style.display = 'block';
+                convertBtn.disabled = true;
+                // Prepare the request data
+                const requestData = {
+                    text: text,
+                    voice: voiceSelect.value,
+                    speed: parseFloat(speedSlider.value)
+                };
+                // Make API request to the TTS endpoint
+                fetch('/tts/', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify(requestData)
+                })
+                .then(response => {
+                    if (!response.ok) {
+                        return response.json().then(err => {
+                            throw new Error(err.detail || 'Failed to generate speech');
+                        });
+                    }
+                    return response.blob();
+                })
+                .then(blob => {
+                    // Create URL for the audio blob
+                    const audioUrl = URL.createObjectURL(blob);
+                    audioPlayer.src = audioUrl;
+                    // Display the audio player
+                    audioContainer.style.display = 'block';
+                    // Auto play (may be blocked by browsers)
+                    audioPlayer.play().catch(e => console.log('Auto-play prevented'));
+                })
+                .catch(error => {
+                    console.error('Error:', error);
+                    errorElement.textContent = error.message || 'An error occurred while generating speech';
+                })
+                .finally(() => {
+                    loading.style.display = 'none';
+                    convertBtn.disabled = false;
+                });
+            });
+        });
+    </script>
+</body>
+</html>