Spaces:

CORVO-AI
/

TTS

Sleeping

File size: 6,510 Bytes

import os
import requests
from flask import Flask, request, jsonify, send_file
import uuid

app = Flask(__name__)

def get_firebase_token():
    """Get a Firebase ID token using a refresh token"""
    url = "https://securetoken.googleapis.com/v1/token?key=AIzaSyDbAIg5AN6Cb5kITejfovleb5VDWw0Kv7s"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "x-client-data": "CIq2yQEIprbJAQipncoBCOLwygEIlaHLAQiKo8sBCJz+zAEIhaDNAQj2ps0BCJDfzgE=",
        "x-client-version": "Chrome/JsCore/10.14.1/FirebaseCore-web",
        "x-firebase-gmpid": "1:593863204797:web:0b6f963bf7dfea5a28bcd4"
    }

    data = {
        "grant_type": "refresh_token",
        "refresh_token": "AMf-vBzwgbfU1K3obVOps6iNiXSW7-_i8XvcCLNyRuBhn0wP4PiMdnL74SUooQqW1fNQS3m-echJJDYpeTDv2kgdTMYQ6TMbvFv2PvYiSjDudoa_QdYuHwe44hP9RhPwt65N0yH-KdVJA0tziY7ihmYCG-14Inhas88TcQOmRZJdbr3vrIoaE7XI0vA-dFNYzfqcOyGxeDZWHnOj8fq3WR5K6LBHacavxOSyfBPDIHAHSaxhirhejFtkx1GPpSZ5L3zMKPlCohJ8NnvYquabhp_Uw7qPytHkAQjPLAlcyeJ4MR2BK0KLfnpLq1sR_mVPJ-Gm8NE89UIwM2XvdcZ6JWgHGEIxTR-hrfgxC0Ku0ZAGA4oDdBlndPIfc13tc-bcw7SEKpgOI6Gmb-4GH4_Tl7jgfJX8wI0O1x08nmb86BYyugf2eGCIKinv091o9cPPSZEvnweE8rX0"
    }

    response = requests.post(url, headers=headers, data=data)

    if response.status_code == 200:
        json_response = response.json()
        id_token = json_response.get("id_token")
        if id_token:
            print("✅ Firebase token retrieved successfully")
            return id_token
        else:
            raise ValueError("Firebase ID token not found in the response")
    else:
        raise Exception(f"❌ Failed to retrieve Firebase token. Status code: {response.status_code}, Response: {response.text}")

def get_speechify_auth_token(firebase_token):
    """Exchange Firebase token for Speechify authentication token"""
    url = f"https://audio.api.speechify.com/v1/auth/sign?id_token={firebase_token}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "Authorization": f"Bearer {firebase_token}",
        "Content-Type": "application/json"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        speechify_token = response.json().get('token')
        if speechify_token:
            print("✅ Speechify token retrieved successfully")
            return speechify_token
        else:
            raise ValueError("Speechify token not found in the response")
    else:
        raise Exception(f"❌ Failed to retrieve Speechify token. Status code: {response.status_code}, Response: {response.text}")

def synthesize_speech(token, text, voice_id, output_file):
    """Generate speech audio using Speechify API"""
    url = "https://audio.api.speechify.com/v3/synthesis/get"

    headers = {
        "authorization": f"Bearer {token}",
        "x-speechify-client": "WebApp",
        "x-speechify-client-version": "2.3.0",
        "Content-Type": "application/json"
    }

    payload = {
        "ssml": f"<speak>{text}</speak>",
        "voice": voice_id,
        "forcedAudioFormat": "mp3"
    }

    try:
        response = requests.post(url, headers=headers, json=payload)

        if response.status_code == 200:
            with open(output_file, "wb") as f:
                f.write(response.content)
            print(f"✅ MP3 file saved as {output_file}")
            return True
        else:
            print(f"❌ Failed to generate audio. Status code: {response.status_code}")
            print(response.text)
            return False
    except Exception as e:
        print(f"❌ An error occurred during speech synthesis: {str(e)}")
        return False

# Create output directory if it doesn't exist
os.makedirs("output", exist_ok=True)

@app.route('/synthesize', methods=['POST'])
def api_synthesize_speech():
    """API endpoint to synthesize speech from text"""
    try:
        # Get parameters from request
        data = request.json

        if not data:
            return jsonify({"error": "No JSON data provided"}), 400

        text = data.get('text')
        voice_id = data.get('voice_id', 'PVL:4f4a27ef-2b17-424f-904c-30bd1ed60fb8')  # Default voice if not provided

        if not text:
            return jsonify({"error": "No text provided"}), 400

        # Generate a unique filename
        filename = f"output/{uuid.uuid4()}.mp3"

        # Get tokens and synthesize speech
        firebase_token = get_firebase_token()
        speechify_token = get_speechify_auth_token(firebase_token)

        result = synthesize_speech(speechify_token, text, voice_id, filename)

        if result:
            # Return the audio file
            return send_file(filename, mimetype='audio/mpeg', as_attachment=True)
        else:
            return jsonify({"error": "Failed to synthesize speech"}), 500

    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/', methods=['GET'])
def home():
    """Home page with usage instructions"""
    return """
    <html>
        <head>
            <title>Speechify API</title>
            <style>
                body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
                code { background-color: #f4f4f4; padding: 2px 5px; border-radius: 3px; }
                pre { background-color: #f4f4f4; padding: 10px; border-radius: 5px; overflow-x: auto; }
            </style>
        </head>
        <body>
            <h1>Speechify Text-to-Speech API</h1>
            <p>Use this API to convert text to speech using Speechify.</p>

            <h2>Endpoint</h2>
            <code>POST /synthesize</code>

            <h2>Request Format</h2>
            <pre>
{
    "text": "Your text to be converted to speech",
    "voice_id": "PVL:4f4a27ef-2b17-424f-904c-30bd1ed60fb8" (optional)
}
            </pre>

            <h2>Example Usage with cURL</h2>
            <pre>
curl -X POST http://localhost:7860/synthesize \\
     -H "Content-Type: application/json" \\
     -d '{"text": "Hello, this is a test", "voice_id": "PVL:4f4a27ef-2b17-424f-904c-30bd1ed60fb8"}'
            </pre>

            <h2>Response</h2>
            <p>The API returns an MP3 audio file if successful, or a JSON error message if there's a problem.</p>
        </body>
    </html>
    """

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=7860)