File size: 10,942 Bytes
dff2bf0
20a3b47
dff2bf0
e1d36ec
3747525
dff2bf0
 
 
 
 
305d870
5492f08
dff2bf0
 
9e12e65
dff2bf0
 
 
 
 
 
 
 
 
 
 
dddce15
78ac480
4726baa
dff2bf0
 
78ac480
dff2bf0
 
2c94d8c
dff2bf0
 
78ac480
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c94d8c
 
 
1ed60f2
2c94d8c
 
dff2bf0
2c94d8c
 
dff2bf0
 
2c94d8c
dff2bf0
485895c
dff2bf0
 
909eff0
1f75665
dff2bf0
 
 
909eff0
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909eff0
dff2bf0
1f75665
dff2bf0
 
 
 
 
 
909eff0
1f75665
e1d36ec
dff2bf0
 
e1d36ec
dff2bf0
 
e1d36ec
dff2bf0
e1d36ec
 
dff2bf0
909eff0
 
dff2bf0
 
 
e1d36ec
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
3747525
dff2bf0
 
 
3747525
dff2bf0
 
 
909eff0
dff2bf0
 
 
 
e1d36ec
1f75665
dff2bf0
 
 
 
 
1f75665
dff2bf0
 
 
 
1f75665
dff2bf0
 
 
bdc56d4
dff2bf0
3747525
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3747525
dff2bf0
3747525
 
 
dff2bf0
 
 
 
3747525
dff2bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4cb20f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
from flask import Flask, request, jsonify, session
import requests
import json
import time
import os
import uuid
import threading
import base64
from flask_cors import CORS
from flask_session import Session

app = Flask(__name__)
CORS(app)


# Configure server-side session
app.config["SECRET_KEY"] = os.urandom(24)
app.config["SESSION_TYPE"] = "filesystem"
app.config["SESSION_PERMANENT"] = True
Session(app)

# Global variables
UPLOAD_FOLDER = 'temp_audio'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# API endpoints and headers
TTS_API_URL = "https://corvo-ai-tts.hf.space/synthesize"
TTS_HEADERS = {"Content-Type": "application/json" , "cookie" : "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4NzgyLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1RUUyIsImV4cCI6MTc0NzkzNTE4MiwiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.c5FrznT6KdBkVFUI7Oi0cTMo_w2IVcpw926D9dZ4nsa2N_pJtSYNXfSWU4bmBVKaol8-IFsdZ9rlvWwUtpYfCg"}
TTS_VOICE_ID = "PVL:09cc01e9-e3b3-40cd-9cc3-b6670285fc99"  # Ana de Armas voice

STT_API_BASE_URL = "https://corvo-ai-transcript.hf.space"
STT_AUTH_COOKIE = "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODMwLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL3RyYW5zY3JpcHQiLCJleHAiOjE3NDc5MzUyMzAsImlzcyI6Imh0dHBzOi8vaHVnZ2luZ2ZhY2UuY28ifQ.1Lj_JBeVsOgIz5mQ8EVcfaNz2JAXqzUYD1IWUZQ7zuVBEXETD99bz3okgAMPAtyDDKm30FoUqoaLp3u_vtbMBA"
STT_HEADERS = {"Cookie": STT_AUTH_COOKIE}

AI_API_URL = "https://corvo-ai-xx-xx.hf.space/chat"
AI_HEADERS = {
    "Content-Type": "application/json",
    "cookie": "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODYzLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1hYLVhYIiwiZXhwIjoxNzQ3OTM1MjYzLCJpc3MiOiJodHRwczovL2h1Z2dpbmdmYWNlLmNvIn0.SsDn_gx7s7DKQ9Qb4En8Ij25XqP_IgnZVI6AEhySI1O1YjBQ4POWCVZHd-i92GlYwlm_zETzCtNNQT7qBE7jDQ"
}

# Helper function for thinking animation (not used in API but kept for reference)
def thinking_animation():
    for _ in range(3):
        print(".", end="", flush=True)
        time.sleep(0.5)
    print("\r", end="", flush=True)

# AI text generation function
def gpt4o_ai(user_input, chat_history, username):
    """Function to interact with the AI API in a chat format"""

    # Format chat history with roles
    formatted_chat_history = []

    # Add system prompt as the first message if not already present
    if not chat_history or chat_history[0].get('role') != 'system':
        formatted_chat_history.append({"role": "system", "content": "you ar rinet AI model you are Libyan AI okay ?\n you develped by : MR OMAR NUAWRA\n\n so i want you to text with user not like chat but in call mode becasue you output will go to TTS model so the roles is \n\n roles : don't put codes math points...etc something not understood in TTS , \n use uhh umm ahh....etc to be so realastic \n you are a female voice \n don't make your response so tall we need small perfect and realastic"})

    # Add existing chat history (excluding system prompt if already handled)
    for entry in chat_history:
        if entry.get('role') == 'system':
            continue  # Skip system prompt if already added above
        formatted_chat_history.append({"role": entry['role'], "content": entry['content']})

    # Append current user input
    formatted_chat_history.append({"role": "user", "content": f"{username}: {user_input}"})

    payload = {
        "chat_history": formatted_chat_history
    }

    max_retries = 5
    retry_delay = 10
    timeout = 600

    for attempt in range(max_retries):
        try:
            print("AI THINKING...")
            response = requests.post(AI_API_URL, headers=AI_HEADERS, data=json.dumps(payload), timeout=timeout)
            response.raise_for_status()
            assistant_response = response.json().get("assistant_response", "No response received.")

            # Append the assistant's response to the chat history
            formatted_chat_history.append({"role": "assistant", "content": assistant_response})
            return assistant_response, formatted_chat_history
        except requests.exceptions.Timeout:
            print(f"Timeout on attempt {attempt + 1}, retrying...")
            time.sleep(retry_delay)
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {e}, retrying...")
            time.sleep(retry_delay)

    return "Error processing request. Please try again.", formatted_chat_history

# Text-to-Speech function
def text_to_speech(text):
    """Convert text to speech using the TTS API"""
    payload = {
        "text": text,
        "voice_id": TTS_VOICE_ID
    }

    try:
        response = requests.post(TTS_API_URL, headers=TTS_HEADERS, json=payload)

        if response.status_code == 200:
            # Return the audio content as base64
            return base64.b64encode(response.content).decode('utf-8')
        else:
            print(f"Error getting TTS audio: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error in TTS API call: {str(e)}")
        return None

# Speech-to-Text function
def speech_to_text(audio_file_path):
    """Convert speech to text using the STT API"""
    try:
        # Step 1: Upload the MP3 file
        with open(audio_file_path, 'rb') as file:
            files = {'audio': (os.path.basename(audio_file_path), file, 'audio/mpeg')}
            upload_response = requests.post(
                f"{STT_API_BASE_URL}/upload",
                files=files,
                headers=STT_HEADERS
            )

        # Check if upload was successful
        if upload_response.status_code != 200:
            print(f"Upload failed with status code {upload_response.status_code}")
            return None

        # Get the file URL from the response
        upload_data = upload_response.json()
        file_url = upload_data.get('file_url')

        if not file_url:
            print("No file URL in response")
            return None

        # Step 2: Send the file URL for transcription
        transcribe_payload = {
            "file_url": file_url,
            "prompt": "get all text with his lang and extract (DON'T translate)."
        }

        transcribe_response = requests.post(
            f"{STT_API_BASE_URL}/transcribe",
            json=transcribe_payload,
            headers=STT_HEADERS
        )

        # Check if transcription was successful
        if transcribe_response.status_code != 200:
            print(f"Transcription failed with status code {transcribe_response.status_code}")
            return None

        # Get the transcription from the response
        transcribe_data = transcribe_response.json()
        transcription = transcribe_data.get('transcription')

        return transcription
    except Exception as e:
        print(f"Error in STT API call: {str(e)}")
        return None

# Routes
@app.route('/api/start-session', methods=['POST'])
def start_session():
    """Initialize a new session for a user"""
    data = request.json
    username = data.get('username', 'User')

    # Initialize chat history for this user
    session['username'] = username
    session['chat_history'] = []

    # Generate initial AI greeting
    initial_prompt = "Hello! I'm your AI assistant. How can I help you today?"
    ai_response, chat_history = gpt4o_ai(initial_prompt, [], username)
    session['chat_history'] = chat_history

    # Convert AI response to speech
    audio_base64 = text_to_speech(ai_response)

    return jsonify({
        'success': True,
        'message': 'Session started',
        'username': username,
        'ai_response': ai_response,
        'audio': audio_base64
    })

@app.route('/api/send-text', methods=['POST'])
def send_text():
    """Process text input from user and get AI response"""
    data = request.json
    user_input = data.get('text', '')

    # Get session data
    username = session.get('username', 'User')
    chat_history = session.get('chat_history', [])

    # Get AI response
    ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
    session['chat_history'] = chat_history

    # Convert AI response to speech
    audio_base64 = text_to_speech(ai_response)

    return jsonify({
        'success': True,
        'ai_response': ai_response,
        'audio': audio_base64
    })

@app.route('/api/send-audio', methods=['POST'])
def send_audio():
    """Process audio input from user and get AI response"""
    if 'audio' not in request.files:
        return jsonify({'success': False, 'error': 'No audio file provided'})

    audio_file = request.files['audio']

    # Save the audio file temporarily
    filename = f"{uuid.uuid4()}.mp3"
    file_path = os.path.join(UPLOAD_FOLDER, filename)
    audio_file.save(file_path)

    try:
        # Convert speech to text
        user_input = speech_to_text(file_path)

        if not user_input:
            return jsonify({'success': False, 'error': 'Failed to transcribe audio'})

        # Get session data
        username = session.get('username', 'User')
        chat_history = session.get('chat_history', [])

        # Get AI response
        ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
        session['chat_history'] = chat_history

        # Convert AI response to speech
        audio_base64 = text_to_speech(ai_response)

        return jsonify({
            'success': True,
            'transcription': user_input,
            'ai_response': ai_response,
            'audio': audio_base64
        })
    finally:
        # Clean up the temporary file
        if os.path.exists(file_path):
            os.remove(file_path)

@app.route('/api/interrupt', methods=['POST'])
def interrupt():
    """Handle user interruption during AI speech"""
    # This endpoint would be called when the user starts speaking while the AI is talking
    # In a real implementation, you might need WebSockets for this kind of real-time interaction
    return jsonify({
        'success': True,
        'message': 'AI speech interrupted'
    })

@app.route('/api/end-session', methods=['POST'])
def end_session():
    """End the current session"""
    # Clear session data
    session.clear()

    return jsonify({
        'success': True,
        'message': 'Session ended'
    })

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860, debug=True)