BACK-END / app.py
CORVO-AI's picture
Update app.py
78ac480 verified
from flask import Flask, request, jsonify, session
import requests
import json
import time
import os
import uuid
import threading
import base64
from flask_cors import CORS
from flask_session import Session
app = Flask(__name__)
CORS(app)
# Configure server-side session
app.config["SECRET_KEY"] = os.urandom(24)
app.config["SESSION_TYPE"] = "filesystem"
app.config["SESSION_PERMANENT"] = True
Session(app)
# Global variables
UPLOAD_FOLDER = 'temp_audio'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# API endpoints and headers
TTS_API_URL = "https://corvo-ai-tts.hf.space/synthesize"
TTS_HEADERS = {"Content-Type": "application/json" , "cookie" : "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4NzgyLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1RUUyIsImV4cCI6MTc0NzkzNTE4MiwiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.c5FrznT6KdBkVFUI7Oi0cTMo_w2IVcpw926D9dZ4nsa2N_pJtSYNXfSWU4bmBVKaol8-IFsdZ9rlvWwUtpYfCg"}
TTS_VOICE_ID = "PVL:09cc01e9-e3b3-40cd-9cc3-b6670285fc99" # Ana de Armas voice
STT_API_BASE_URL = "https://corvo-ai-transcript.hf.space"
STT_AUTH_COOKIE = "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODMwLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL3RyYW5zY3JpcHQiLCJleHAiOjE3NDc5MzUyMzAsImlzcyI6Imh0dHBzOi8vaHVnZ2luZ2ZhY2UuY28ifQ.1Lj_JBeVsOgIz5mQ8EVcfaNz2JAXqzUYD1IWUZQ7zuVBEXETD99bz3okgAMPAtyDDKm30FoUqoaLp3u_vtbMBA"
STT_HEADERS = {"Cookie": STT_AUTH_COOKIE}
AI_API_URL = "https://corvo-ai-xx-xx.hf.space/chat"
AI_HEADERS = {
"Content-Type": "application/json",
"cookie": "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODYzLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1hYLVhYIiwiZXhwIjoxNzQ3OTM1MjYzLCJpc3MiOiJodHRwczovL2h1Z2dpbmdmYWNlLmNvIn0.SsDn_gx7s7DKQ9Qb4En8Ij25XqP_IgnZVI6AEhySI1O1YjBQ4POWCVZHd-i92GlYwlm_zETzCtNNQT7qBE7jDQ"
}
# Helper function for thinking animation (not used in API but kept for reference)
def thinking_animation():
for _ in range(3):
print(".", end="", flush=True)
time.sleep(0.5)
print("\r", end="", flush=True)
# AI text generation function
def gpt4o_ai(user_input, chat_history, username):
"""Function to interact with the AI API in a chat format"""
# Format chat history with roles
formatted_chat_history = []
# Add system prompt as the first message if not already present
if not chat_history or chat_history[0].get('role') != 'system':
formatted_chat_history.append({"role": "system", "content": "you ar rinet AI model you are Libyan AI okay ?\n you develped by : MR OMAR NUAWRA\n\n so i want you to text with user not like chat but in call mode becasue you output will go to TTS model so the roles is \n\n roles : don't put codes math points...etc something not understood in TTS , \n use uhh umm ahh....etc to be so realastic \n you are a female voice \n don't make your response so tall we need small perfect and realastic"})
# Add existing chat history (excluding system prompt if already handled)
for entry in chat_history:
if entry.get('role') == 'system':
continue # Skip system prompt if already added above
formatted_chat_history.append({"role": entry['role'], "content": entry['content']})
# Append current user input
formatted_chat_history.append({"role": "user", "content": f"{username}: {user_input}"})
payload = {
"chat_history": formatted_chat_history
}
max_retries = 5
retry_delay = 10
timeout = 600
for attempt in range(max_retries):
try:
print("AI THINKING...")
response = requests.post(AI_API_URL, headers=AI_HEADERS, data=json.dumps(payload), timeout=timeout)
response.raise_for_status()
assistant_response = response.json().get("assistant_response", "No response received.")
# Append the assistant's response to the chat history
formatted_chat_history.append({"role": "assistant", "content": assistant_response})
return assistant_response, formatted_chat_history
except requests.exceptions.Timeout:
print(f"Timeout on attempt {attempt + 1}, retrying...")
time.sleep(retry_delay)
except Exception as e:
print(f"Error on attempt {attempt + 1}: {e}, retrying...")
time.sleep(retry_delay)
return "Error processing request. Please try again.", formatted_chat_history
# Text-to-Speech function
def text_to_speech(text):
"""Convert text to speech using the TTS API"""
payload = {
"text": text,
"voice_id": TTS_VOICE_ID
}
try:
response = requests.post(TTS_API_URL, headers=TTS_HEADERS, json=payload)
if response.status_code == 200:
# Return the audio content as base64
return base64.b64encode(response.content).decode('utf-8')
else:
print(f"Error getting TTS audio: {response.status_code}")
return None
except Exception as e:
print(f"Error in TTS API call: {str(e)}")
return None
# Speech-to-Text function
def speech_to_text(audio_file_path):
"""Convert speech to text using the STT API"""
try:
# Step 1: Upload the MP3 file
with open(audio_file_path, 'rb') as file:
files = {'audio': (os.path.basename(audio_file_path), file, 'audio/mpeg')}
upload_response = requests.post(
f"{STT_API_BASE_URL}/upload",
files=files,
headers=STT_HEADERS
)
# Check if upload was successful
if upload_response.status_code != 200:
print(f"Upload failed with status code {upload_response.status_code}")
return None
# Get the file URL from the response
upload_data = upload_response.json()
file_url = upload_data.get('file_url')
if not file_url:
print("No file URL in response")
return None
# Step 2: Send the file URL for transcription
transcribe_payload = {
"file_url": file_url,
"prompt": "get all text with his lang and extract (DON'T translate)."
}
transcribe_response = requests.post(
f"{STT_API_BASE_URL}/transcribe",
json=transcribe_payload,
headers=STT_HEADERS
)
# Check if transcription was successful
if transcribe_response.status_code != 200:
print(f"Transcription failed with status code {transcribe_response.status_code}")
return None
# Get the transcription from the response
transcribe_data = transcribe_response.json()
transcription = transcribe_data.get('transcription')
return transcription
except Exception as e:
print(f"Error in STT API call: {str(e)}")
return None
# Routes
@app.route('/api/start-session', methods=['POST'])
def start_session():
"""Initialize a new session for a user"""
data = request.json
username = data.get('username', 'User')
# Initialize chat history for this user
session['username'] = username
session['chat_history'] = []
# Generate initial AI greeting
initial_prompt = "Hello! I'm your AI assistant. How can I help you today?"
ai_response, chat_history = gpt4o_ai(initial_prompt, [], username)
session['chat_history'] = chat_history
# Convert AI response to speech
audio_base64 = text_to_speech(ai_response)
return jsonify({
'success': True,
'message': 'Session started',
'username': username,
'ai_response': ai_response,
'audio': audio_base64
})
@app.route('/api/send-text', methods=['POST'])
def send_text():
"""Process text input from user and get AI response"""
data = request.json
user_input = data.get('text', '')
# Get session data
username = session.get('username', 'User')
chat_history = session.get('chat_history', [])
# Get AI response
ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
session['chat_history'] = chat_history
# Convert AI response to speech
audio_base64 = text_to_speech(ai_response)
return jsonify({
'success': True,
'ai_response': ai_response,
'audio': audio_base64
})
@app.route('/api/send-audio', methods=['POST'])
def send_audio():
"""Process audio input from user and get AI response"""
if 'audio' not in request.files:
return jsonify({'success': False, 'error': 'No audio file provided'})
audio_file = request.files['audio']
# Save the audio file temporarily
filename = f"{uuid.uuid4()}.mp3"
file_path = os.path.join(UPLOAD_FOLDER, filename)
audio_file.save(file_path)
try:
# Convert speech to text
user_input = speech_to_text(file_path)
if not user_input:
return jsonify({'success': False, 'error': 'Failed to transcribe audio'})
# Get session data
username = session.get('username', 'User')
chat_history = session.get('chat_history', [])
# Get AI response
ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
session['chat_history'] = chat_history
# Convert AI response to speech
audio_base64 = text_to_speech(ai_response)
return jsonify({
'success': True,
'transcription': user_input,
'ai_response': ai_response,
'audio': audio_base64
})
finally:
# Clean up the temporary file
if os.path.exists(file_path):
os.remove(file_path)
@app.route('/api/interrupt', methods=['POST'])
def interrupt():
"""Handle user interruption during AI speech"""
# This endpoint would be called when the user starts speaking while the AI is talking
# In a real implementation, you might need WebSockets for this kind of real-time interaction
return jsonify({
'success': True,
'message': 'AI speech interrupted'
})
@app.route('/api/end-session', methods=['POST'])
def end_session():
"""End the current session"""
# Clear session data
session.clear()
return jsonify({
'success': True,
'message': 'Session ended'
})
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860, debug=True)