from flask import Flask, request, jsonify, session import requests import json import time import os import uuid import threading import base64 from flask_cors import CORS from flask_session import Session app = Flask(__name__) CORS(app) # Configure server-side session app.config["SECRET_KEY"] = os.urandom(24) app.config["SESSION_TYPE"] = "filesystem" app.config["SESSION_PERMANENT"] = True Session(app) # Global variables UPLOAD_FOLDER = 'temp_audio' os.makedirs(UPLOAD_FOLDER, exist_ok=True) # API endpoints and headers TTS_API_URL = "https://corvo-ai-tts.hf.space/synthesize" TTS_HEADERS = {"Content-Type": "application/json" , "cookie" : "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4NzgyLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1RUUyIsImV4cCI6MTc0NzkzNTE4MiwiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.c5FrznT6KdBkVFUI7Oi0cTMo_w2IVcpw926D9dZ4nsa2N_pJtSYNXfSWU4bmBVKaol8-IFsdZ9rlvWwUtpYfCg"} TTS_VOICE_ID = "PVL:09cc01e9-e3b3-40cd-9cc3-b6670285fc99" # Ana de Armas voice STT_API_BASE_URL = "https://corvo-ai-transcript.hf.space" STT_AUTH_COOKIE = "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODMwLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL3RyYW5zY3JpcHQiLCJleHAiOjE3NDc5MzUyMzAsImlzcyI6Imh0dHBzOi8vaHVnZ2luZ2ZhY2UuY28ifQ.1Lj_JBeVsOgIz5mQ8EVcfaNz2JAXqzUYD1IWUZQ7zuVBEXETD99bz3okgAMPAtyDDKm30FoUqoaLp3u_vtbMBA" STT_HEADERS = {"Cookie": STT_AUTH_COOKIE} AI_API_URL = "https://corvo-ai-xx-xx.hf.space/chat" AI_HEADERS = { "Content-Type": "application/json", "cookie": "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODYzLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1hYLVhYIiwiZXhwIjoxNzQ3OTM1MjYzLCJpc3MiOiJodHRwczovL2h1Z2dpbmdmYWNlLmNvIn0.SsDn_gx7s7DKQ9Qb4En8Ij25XqP_IgnZVI6AEhySI1O1YjBQ4POWCVZHd-i92GlYwlm_zETzCtNNQT7qBE7jDQ" } # Helper function for thinking animation (not used in API but kept for reference) def thinking_animation(): for _ in range(3): print(".", end="", flush=True) time.sleep(0.5) print("\r", end="", flush=True) # AI text generation function def gpt4o_ai(user_input, chat_history, username): """Function to interact with the AI API in a chat format""" # Format chat history with roles formatted_chat_history = [] # Add system prompt as the first message if not already present if not chat_history or chat_history[0].get('role') != 'system': formatted_chat_history.append({"role": "system", "content": "you ar rinet AI model you are Libyan AI okay ?\n you develped by : MR OMAR NUAWRA\n\n so i want you to text with user not like chat but in call mode becasue you output will go to TTS model so the roles is \n\n roles : don't put codes math points...etc something not understood in TTS , \n use uhh umm ahh....etc to be so realastic \n you are a female voice \n don't make your response so tall we need small perfect and realastic"}) # Add existing chat history (excluding system prompt if already handled) for entry in chat_history: if entry.get('role') == 'system': continue # Skip system prompt if already added above formatted_chat_history.append({"role": entry['role'], "content": entry['content']}) # Append current user input formatted_chat_history.append({"role": "user", "content": f"{username}: {user_input}"}) payload = { "chat_history": formatted_chat_history } max_retries = 5 retry_delay = 10 timeout = 600 for attempt in range(max_retries): try: print("AI THINKING...") response = requests.post(AI_API_URL, headers=AI_HEADERS, data=json.dumps(payload), timeout=timeout) response.raise_for_status() assistant_response = response.json().get("assistant_response", "No response received.") # Append the assistant's response to the chat history formatted_chat_history.append({"role": "assistant", "content": assistant_response}) return assistant_response, formatted_chat_history except requests.exceptions.Timeout: print(f"Timeout on attempt {attempt + 1}, retrying...") time.sleep(retry_delay) except Exception as e: print(f"Error on attempt {attempt + 1}: {e}, retrying...") time.sleep(retry_delay) return "Error processing request. Please try again.", formatted_chat_history # Text-to-Speech function def text_to_speech(text): """Convert text to speech using the TTS API""" payload = { "text": text, "voice_id": TTS_VOICE_ID } try: response = requests.post(TTS_API_URL, headers=TTS_HEADERS, json=payload) if response.status_code == 200: # Return the audio content as base64 return base64.b64encode(response.content).decode('utf-8') else: print(f"Error getting TTS audio: {response.status_code}") return None except Exception as e: print(f"Error in TTS API call: {str(e)}") return None # Speech-to-Text function def speech_to_text(audio_file_path): """Convert speech to text using the STT API""" try: # Step 1: Upload the MP3 file with open(audio_file_path, 'rb') as file: files = {'audio': (os.path.basename(audio_file_path), file, 'audio/mpeg')} upload_response = requests.post( f"{STT_API_BASE_URL}/upload", files=files, headers=STT_HEADERS ) # Check if upload was successful if upload_response.status_code != 200: print(f"Upload failed with status code {upload_response.status_code}") return None # Get the file URL from the response upload_data = upload_response.json() file_url = upload_data.get('file_url') if not file_url: print("No file URL in response") return None # Step 2: Send the file URL for transcription transcribe_payload = { "file_url": file_url, "prompt": "get all text with his lang and extract (DON'T translate)." } transcribe_response = requests.post( f"{STT_API_BASE_URL}/transcribe", json=transcribe_payload, headers=STT_HEADERS ) # Check if transcription was successful if transcribe_response.status_code != 200: print(f"Transcription failed with status code {transcribe_response.status_code}") return None # Get the transcription from the response transcribe_data = transcribe_response.json() transcription = transcribe_data.get('transcription') return transcription except Exception as e: print(f"Error in STT API call: {str(e)}") return None # Routes @app.route('/api/start-session', methods=['POST']) def start_session(): """Initialize a new session for a user""" data = request.json username = data.get('username', 'User') # Initialize chat history for this user session['username'] = username session['chat_history'] = [] # Generate initial AI greeting initial_prompt = "Hello! I'm your AI assistant. How can I help you today?" ai_response, chat_history = gpt4o_ai(initial_prompt, [], username) session['chat_history'] = chat_history # Convert AI response to speech audio_base64 = text_to_speech(ai_response) return jsonify({ 'success': True, 'message': 'Session started', 'username': username, 'ai_response': ai_response, 'audio': audio_base64 }) @app.route('/api/send-text', methods=['POST']) def send_text(): """Process text input from user and get AI response""" data = request.json user_input = data.get('text', '') # Get session data username = session.get('username', 'User') chat_history = session.get('chat_history', []) # Get AI response ai_response, chat_history = gpt4o_ai(user_input, chat_history, username) session['chat_history'] = chat_history # Convert AI response to speech audio_base64 = text_to_speech(ai_response) return jsonify({ 'success': True, 'ai_response': ai_response, 'audio': audio_base64 }) @app.route('/api/send-audio', methods=['POST']) def send_audio(): """Process audio input from user and get AI response""" if 'audio' not in request.files: return jsonify({'success': False, 'error': 'No audio file provided'}) audio_file = request.files['audio'] # Save the audio file temporarily filename = f"{uuid.uuid4()}.mp3" file_path = os.path.join(UPLOAD_FOLDER, filename) audio_file.save(file_path) try: # Convert speech to text user_input = speech_to_text(file_path) if not user_input: return jsonify({'success': False, 'error': 'Failed to transcribe audio'}) # Get session data username = session.get('username', 'User') chat_history = session.get('chat_history', []) # Get AI response ai_response, chat_history = gpt4o_ai(user_input, chat_history, username) session['chat_history'] = chat_history # Convert AI response to speech audio_base64 = text_to_speech(ai_response) return jsonify({ 'success': True, 'transcription': user_input, 'ai_response': ai_response, 'audio': audio_base64 }) finally: # Clean up the temporary file if os.path.exists(file_path): os.remove(file_path) @app.route('/api/interrupt', methods=['POST']) def interrupt(): """Handle user interruption during AI speech""" # This endpoint would be called when the user starts speaking while the AI is talking # In a real implementation, you might need WebSockets for this kind of real-time interaction return jsonify({ 'success': True, 'message': 'AI speech interrupted' }) @app.route('/api/end-session', methods=['POST']) def end_session(): """End the current session""" # Clear session data session.clear() return jsonify({ 'success': True, 'message': 'Session ended' }) if __name__ == '__main__': app.run(host="0.0.0.0", port=7860, debug=True)