BACK-END

Sleeping

File size: 10,942 Bytes

dff2bf0
20a3b47
dff2bf0
e1d36ec
3747525
dff2bf0
 
 
 
 
305d870
5492f08
dff2bf0
 
9e12e65
dff2bf0
 
 
 
 
 
 
 
 
 
 
dddce15
78ac480
4726baa
dff2bf0
 
78ac480
dff2bf0
 
2c94d8c
dff2bf0
 
78ac480
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c94d8c
 
 
1ed60f2
2c94d8c
 
dff2bf0
2c94d8c
 
dff2bf0
 
2c94d8c
dff2bf0
485895c
dff2bf0
 
909eff0
1f75665
dff2bf0
 
 
909eff0
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909eff0
dff2bf0
1f75665
dff2bf0
 
 
 
 
 
909eff0
1f75665
e1d36ec
dff2bf0
 
e1d36ec
dff2bf0
 
e1d36ec
dff2bf0
e1d36ec
 
dff2bf0
909eff0
 
dff2bf0
 
 
e1d36ec
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
3747525
dff2bf0
 
 
3747525
dff2bf0
 
 
909eff0
dff2bf0
 
 
 
e1d36ec
1f75665
dff2bf0
 
 
 
 
1f75665
dff2bf0
 
 
 
1f75665
dff2bf0
 
 
bdc56d4
dff2bf0
3747525
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3747525
dff2bf0
3747525
 
 
dff2bf0
 
 
 
3747525
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4cb20f

from flask import Flask, request, jsonify, session
import requests
import json
import time
import os
import uuid
import threading
import base64
from flask_cors import CORS
from flask_session import Session

app = Flask(__name__)
CORS(app)


# Configure server-side session
app.config["SECRET_KEY"] = os.urandom(24)
app.config["SESSION_TYPE"] = "filesystem"
app.config["SESSION_PERMANENT"] = True
Session(app)

# Global variables
UPLOAD_FOLDER = 'temp_audio'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# API endpoints and headers
TTS_API_URL = "https://corvo-ai-tts.hf.space/synthesize"
TTS_HEADERS = {"Content-Type": "application/json" , "cookie" : "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4NzgyLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1RUUyIsImV4cCI6MTc0NzkzNTE4MiwiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.c5FrznT6KdBkVFUI7Oi0cTMo_w2IVcpw926D9dZ4nsa2N_pJtSYNXfSWU4bmBVKaol8-IFsdZ9rlvWwUtpYfCg"}
TTS_VOICE_ID = "PVL:09cc01e9-e3b3-40cd-9cc3-b6670285fc99"  # Ana de Armas voice

STT_API_BASE_URL = "https://corvo-ai-transcript.hf.space"
STT_AUTH_COOKIE = "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODMwLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL3RyYW5zY3JpcHQiLCJleHAiOjE3NDc5MzUyMzAsImlzcyI6Imh0dHBzOi8vaHVnZ2luZ2ZhY2UuY28ifQ.1Lj_JBeVsOgIz5mQ8EVcfaNz2JAXqzUYD1IWUZQ7zuVBEXETD99bz3okgAMPAtyDDKm30FoUqoaLp3u_vtbMBA"
STT_HEADERS = {"Cookie": STT_AUTH_COOKIE}

AI_API_URL = "https://corvo-ai-xx-xx.hf.space/chat"
AI_HEADERS = {
    "Content-Type": "application/json",
    "cookie": "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODYzLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1hYLVhYIiwiZXhwIjoxNzQ3OTM1MjYzLCJpc3MiOiJodHRwczovL2h1Z2dpbmdmYWNlLmNvIn0.SsDn_gx7s7DKQ9Qb4En8Ij25XqP_IgnZVI6AEhySI1O1YjBQ4POWCVZHd-i92GlYwlm_zETzCtNNQT7qBE7jDQ"
}

# Helper function for thinking animation (not used in API but kept for reference)
def thinking_animation():
    for _ in range(3):
        print(".", end="", flush=True)
        time.sleep(0.5)
    print("\r", end="", flush=True)

# AI text generation function
def gpt4o_ai(user_input, chat_history, username):
    """Function to interact with the AI API in a chat format"""

    # Format chat history with roles
    formatted_chat_history = []

    # Add system prompt as the first message if not already present
    if not chat_history or chat_history[0].get('role') != 'system':
        formatted_chat_history.append({"role": "system", "content": "you ar rinet AI model you are Libyan AI okay ?\n you develped by : MR OMAR NUAWRA\n\n so i want you to text with user not like chat but in call mode becasue you output will go to TTS model so the roles is \n\n roles : don't put codes math points...etc something not understood in TTS , \n use uhh umm ahh....etc to be so realastic \n you are a female voice \n don't make your response so tall we need small perfect and realastic"})

    # Add existing chat history (excluding system prompt if already handled)
    for entry in chat_history:
        if entry.get('role') == 'system':
            continue  # Skip system prompt if already added above
        formatted_chat_history.append({"role": entry['role'], "content": entry['content']})

    # Append current user input
    formatted_chat_history.append({"role": "user", "content": f"{username}: {user_input}"})

    payload = {
        "chat_history": formatted_chat_history
    }

    max_retries = 5
    retry_delay = 10
    timeout = 600

    for attempt in range(max_retries):
        try:
            print("AI THINKING...")
            response = requests.post(AI_API_URL, headers=AI_HEADERS, data=json.dumps(payload), timeout=timeout)
            response.raise_for_status()
            assistant_response = response.json().get("assistant_response", "No response received.")

            # Append the assistant's response to the chat history
            formatted_chat_history.append({"role": "assistant", "content": assistant_response})
            return assistant_response, formatted_chat_history
        except requests.exceptions.Timeout:
            print(f"Timeout on attempt {attempt + 1}, retrying...")
            time.sleep(retry_delay)
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {e}, retrying...")
            time.sleep(retry_delay)

    return "Error processing request. Please try again.", formatted_chat_history

# Text-to-Speech function
def text_to_speech(text):
    """Convert text to speech using the TTS API"""
    payload = {
        "text": text,
        "voice_id": TTS_VOICE_ID
    }

    try:
        response = requests.post(TTS_API_URL, headers=TTS_HEADERS, json=payload)

        if response.status_code == 200:
            # Return the audio content as base64
            return base64.b64encode(response.content).decode('utf-8')
        else:
            print(f"Error getting TTS audio: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error in TTS API call: {str(e)}")
        return None

# Speech-to-Text function
def speech_to_text(audio_file_path):
    """Convert speech to text using the STT API"""
    try:
        # Step 1: Upload the MP3 file
        with open(audio_file_path, 'rb') as file:
            files = {'audio': (os.path.basename(audio_file_path), file, 'audio/mpeg')}
            upload_response = requests.post(
                f"{STT_API_BASE_URL}/upload",
                files=files,
                headers=STT_HEADERS
            )

        # Check if upload was successful
        if upload_response.status_code != 200:
            print(f"Upload failed with status code {upload_response.status_code}")
            return None

        # Get the file URL from the response
        upload_data = upload_response.json()
        file_url = upload_data.get('file_url')

        if not file_url:
            print("No file URL in response")
            return None

        # Step 2: Send the file URL for transcription
        transcribe_payload = {
            "file_url": file_url,
            "prompt": "get all text with his lang and extract (DON'T translate)."
        }

        transcribe_response = requests.post(
            f"{STT_API_BASE_URL}/transcribe",
            json=transcribe_payload,
            headers=STT_HEADERS
        )

        # Check if transcription was successful
        if transcribe_response.status_code != 200:
            print(f"Transcription failed with status code {transcribe_response.status_code}")
            return None

        # Get the transcription from the response
        transcribe_data = transcribe_response.json()
        transcription = transcribe_data.get('transcription')

        return transcription
    except Exception as e:
        print(f"Error in STT API call: {str(e)}")
        return None

# Routes
@app.route('/api/start-session', methods=['POST'])
def start_session():
    """Initialize a new session for a user"""
    data = request.json
    username = data.get('username', 'User')

    # Initialize chat history for this user
    session['username'] = username
    session['chat_history'] = []

    # Generate initial AI greeting
    initial_prompt = "Hello! I'm your AI assistant. How can I help you today?"
    ai_response, chat_history = gpt4o_ai(initial_prompt, [], username)
    session['chat_history'] = chat_history

    # Convert AI response to speech
    audio_base64 = text_to_speech(ai_response)

    return jsonify({
        'success': True,
        'message': 'Session started',
        'username': username,
        'ai_response': ai_response,
        'audio': audio_base64
    })

@app.route('/api/send-text', methods=['POST'])
def send_text():
    """Process text input from user and get AI response"""
    data = request.json
    user_input = data.get('text', '')

    # Get session data
    username = session.get('username', 'User')
    chat_history = session.get('chat_history', [])

    # Get AI response
    ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
    session['chat_history'] = chat_history

    # Convert AI response to speech
    audio_base64 = text_to_speech(ai_response)

    return jsonify({
        'success': True,
        'ai_response': ai_response,
        'audio': audio_base64
    })

@app.route('/api/send-audio', methods=['POST'])
def send_audio():
    """Process audio input from user and get AI response"""
    if 'audio' not in request.files:
        return jsonify({'success': False, 'error': 'No audio file provided'})

    audio_file = request.files['audio']

    # Save the audio file temporarily
    filename = f"{uuid.uuid4()}.mp3"
    file_path = os.path.join(UPLOAD_FOLDER, filename)
    audio_file.save(file_path)

    try:
        # Convert speech to text
        user_input = speech_to_text(file_path)

        if not user_input:
            return jsonify({'success': False, 'error': 'Failed to transcribe audio'})

        # Get session data
        username = session.get('username', 'User')
        chat_history = session.get('chat_history', [])

        # Get AI response
        ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
        session['chat_history'] = chat_history

        # Convert AI response to speech
        audio_base64 = text_to_speech(ai_response)

        return jsonify({
            'success': True,
            'transcription': user_input,
            'ai_response': ai_response,
            'audio': audio_base64
        })
    finally:
        # Clean up the temporary file
        if os.path.exists(file_path):
            os.remove(file_path)

@app.route('/api/interrupt', methods=['POST'])
def interrupt():
    """Handle user interruption during AI speech"""
    # This endpoint would be called when the user starts speaking while the AI is talking
    # In a real implementation, you might need WebSockets for this kind of real-time interaction
    return jsonify({
        'success': True,
        'message': 'AI speech interrupted'
    })

@app.route('/api/end-session', methods=['POST'])
def end_session():
    """End the current session"""
    # Clear session data
    session.clear()

    return jsonify({
        'success': True,
        'message': 'Session ended'
    })

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860, debug=True)