BACK-END

Sleeping

App Files Files Community

BACK-END / app.py

CORVO-AI

Update app.py

78ac480 verified 9 months ago

raw

history blame contribute delete

10.9 kB

	from flask import Flask, request, jsonify, session
	import requests
	import json
	import time
	import os
	import uuid
	import threading
	import base64
	from flask_cors import CORS
	from flask_session import Session

	app = Flask(__name__)
	CORS(app)


	# Configure server-side session
	app.config["SECRET_KEY"] = os.urandom(24)
	app.config["SESSION_TYPE"] = "filesystem"
	app.config["SESSION_PERMANENT"] = True
	Session(app)

	# Global variables
	UPLOAD_FOLDER = 'temp_audio'
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	# API endpoints and headers
	TTS_API_URL = "https://corvo-ai-tts.hf.space/synthesize"
	TTS_HEADERS = {"Content-Type": "application/json" , "cookie" : "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4NzgyLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1RUUyIsImV4cCI6MTc0NzkzNTE4MiwiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.c5FrznT6KdBkVFUI7Oi0cTMo_w2IVcpw926D9dZ4nsa2N_pJtSYNXfSWU4bmBVKaol8-IFsdZ9rlvWwUtpYfCg"}
	TTS_VOICE_ID = "PVL:09cc01e9-e3b3-40cd-9cc3-b6670285fc99" # Ana de Armas voice

	STT_API_BASE_URL = "https://corvo-ai-transcript.hf.space"
	STT_AUTH_COOKIE = "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODMwLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL3RyYW5zY3JpcHQiLCJleHAiOjE3NDc5MzUyMzAsImlzcyI6Imh0dHBzOi8vaHVnZ2luZ2ZhY2UuY28ifQ.1Lj_JBeVsOgIz5mQ8EVcfaNz2JAXqzUYD1IWUZQ7zuVBEXETD99bz3okgAMPAtyDDKm30FoUqoaLp3u_vtbMBA"
	STT_HEADERS = {"Cookie": STT_AUTH_COOKIE}

	AI_API_URL = "https://corvo-ai-xx-xx.hf.space/chat"
	AI_HEADERS = {
	"Content-Type": "application/json",
	"cookie": "spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMTAiLCJ1c2VyIjoiQ09SVk8tQUkiLCJzZXNzaW9uSWQiOiI2NzQ2ZTcwYzQ5MGM3M2EwOTdiMzBiMWQifSwiaWF0IjoxNzQ3ODQ4ODYzLCJzdWIiOiIvc3BhY2VzL0NPUlZPLUFJL1hYLVhYIiwiZXhwIjoxNzQ3OTM1MjYzLCJpc3MiOiJodHRwczovL2h1Z2dpbmdmYWNlLmNvIn0.SsDn_gx7s7DKQ9Qb4En8Ij25XqP_IgnZVI6AEhySI1O1YjBQ4POWCVZHd-i92GlYwlm_zETzCtNNQT7qBE7jDQ"
	}

	# Helper function for thinking animation (not used in API but kept for reference)
	def thinking_animation():
	for _ in range(3):
	print(".", end="", flush=True)
	time.sleep(0.5)
	print("\r", end="", flush=True)

	# AI text generation function
	def gpt4o_ai(user_input, chat_history, username):
	"""Function to interact with the AI API in a chat format"""

	# Format chat history with roles
	formatted_chat_history = []

	# Add system prompt as the first message if not already present
	if not chat_history or chat_history[0].get('role') != 'system':
	formatted_chat_history.append({"role": "system", "content": "you ar rinet AI model you are Libyan AI okay ?\n you develped by : MR OMAR NUAWRA\n\n so i want you to text with user not like chat but in call mode becasue you output will go to TTS model so the roles is \n\n roles : don't put codes math points...etc something not understood in TTS , \n use uhh umm ahh....etc to be so realastic \n you are a female voice \n don't make your response so tall we need small perfect and realastic"})

	# Add existing chat history (excluding system prompt if already handled)
	for entry in chat_history:
	if entry.get('role') == 'system':
	continue # Skip system prompt if already added above
	formatted_chat_history.append({"role": entry['role'], "content": entry['content']})

	# Append current user input
	formatted_chat_history.append({"role": "user", "content": f"{username}: {user_input}"})

	payload = {
	"chat_history": formatted_chat_history
	}

	max_retries = 5
	retry_delay = 10
	timeout = 600

	for attempt in range(max_retries):
	try:
	print("AI THINKING...")
	response = requests.post(AI_API_URL, headers=AI_HEADERS, data=json.dumps(payload), timeout=timeout)
	response.raise_for_status()
	assistant_response = response.json().get("assistant_response", "No response received.")

	# Append the assistant's response to the chat history
	formatted_chat_history.append({"role": "assistant", "content": assistant_response})
	return assistant_response, formatted_chat_history
	except requests.exceptions.Timeout:
	print(f"Timeout on attempt {attempt + 1}, retrying...")
	time.sleep(retry_delay)
	except Exception as e:
	print(f"Error on attempt {attempt + 1}: {e}, retrying...")
	time.sleep(retry_delay)

	return "Error processing request. Please try again.", formatted_chat_history

	# Text-to-Speech function
	def text_to_speech(text):
	"""Convert text to speech using the TTS API"""
	payload = {
	"text": text,
	"voice_id": TTS_VOICE_ID
	}

	try:
	response = requests.post(TTS_API_URL, headers=TTS_HEADERS, json=payload)

	if response.status_code == 200:
	# Return the audio content as base64
	return base64.b64encode(response.content).decode('utf-8')
	else:
	print(f"Error getting TTS audio: {response.status_code}")
	return None
	except Exception as e:
	print(f"Error in TTS API call: {str(e)}")
	return None

	# Speech-to-Text function
	def speech_to_text(audio_file_path):
	"""Convert speech to text using the STT API"""
	try:
	# Step 1: Upload the MP3 file
	with open(audio_file_path, 'rb') as file:
	files = {'audio': (os.path.basename(audio_file_path), file, 'audio/mpeg')}
	upload_response = requests.post(
	f"{STT_API_BASE_URL}/upload",
	files=files,
	headers=STT_HEADERS
	)

	# Check if upload was successful
	if upload_response.status_code != 200:
	print(f"Upload failed with status code {upload_response.status_code}")
	return None

	# Get the file URL from the response
	upload_data = upload_response.json()
	file_url = upload_data.get('file_url')

	if not file_url:
	print("No file URL in response")
	return None

	# Step 2: Send the file URL for transcription
	transcribe_payload = {
	"file_url": file_url,
	"prompt": "get all text with his lang and extract (DON'T translate)."
	}

	transcribe_response = requests.post(
	f"{STT_API_BASE_URL}/transcribe",
	json=transcribe_payload,
	headers=STT_HEADERS
	)

	# Check if transcription was successful
	if transcribe_response.status_code != 200:
	print(f"Transcription failed with status code {transcribe_response.status_code}")
	return None

	# Get the transcription from the response
	transcribe_data = transcribe_response.json()
	transcription = transcribe_data.get('transcription')

	return transcription
	except Exception as e:
	print(f"Error in STT API call: {str(e)}")
	return None

	# Routes
	@app.route('/api/start-session', methods=['POST'])
	def start_session():
	"""Initialize a new session for a user"""
	data = request.json
	username = data.get('username', 'User')

	# Initialize chat history for this user
	session['username'] = username
	session['chat_history'] = []

	# Generate initial AI greeting
	initial_prompt = "Hello! I'm your AI assistant. How can I help you today?"
	ai_response, chat_history = gpt4o_ai(initial_prompt, [], username)
	session['chat_history'] = chat_history

	# Convert AI response to speech
	audio_base64 = text_to_speech(ai_response)

	return jsonify({
	'success': True,
	'message': 'Session started',
	'username': username,
	'ai_response': ai_response,
	'audio': audio_base64
	})

	@app.route('/api/send-text', methods=['POST'])
	def send_text():
	"""Process text input from user and get AI response"""
	data = request.json
	user_input = data.get('text', '')

	# Get session data
	username = session.get('username', 'User')
	chat_history = session.get('chat_history', [])

	# Get AI response
	ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
	session['chat_history'] = chat_history

	# Convert AI response to speech
	audio_base64 = text_to_speech(ai_response)

	return jsonify({
	'success': True,
	'ai_response': ai_response,
	'audio': audio_base64
	})

	@app.route('/api/send-audio', methods=['POST'])
	def send_audio():
	"""Process audio input from user and get AI response"""
	if 'audio' not in request.files:
	return jsonify({'success': False, 'error': 'No audio file provided'})

	audio_file = request.files['audio']

	# Save the audio file temporarily
	filename = f"{uuid.uuid4()}.mp3"
	file_path = os.path.join(UPLOAD_FOLDER, filename)
	audio_file.save(file_path)

	try:
	# Convert speech to text
	user_input = speech_to_text(file_path)

	if not user_input:
	return jsonify({'success': False, 'error': 'Failed to transcribe audio'})

	# Get session data
	username = session.get('username', 'User')
	chat_history = session.get('chat_history', [])

	# Get AI response
	ai_response, chat_history = gpt4o_ai(user_input, chat_history, username)
	session['chat_history'] = chat_history

	# Convert AI response to speech
	audio_base64 = text_to_speech(ai_response)

	return jsonify({
	'success': True,
	'transcription': user_input,
	'ai_response': ai_response,
	'audio': audio_base64
	})
	finally:
	# Clean up the temporary file
	if os.path.exists(file_path):
	os.remove(file_path)

	@app.route('/api/interrupt', methods=['POST'])
	def interrupt():
	"""Handle user interruption during AI speech"""
	# This endpoint would be called when the user starts speaking while the AI is talking
	# In a real implementation, you might need WebSockets for this kind of real-time interaction
	return jsonify({
	'success': True,
	'message': 'AI speech interrupted'
	})

	@app.route('/api/end-session', methods=['POST'])
	def end_session():
	"""End the current session"""
	# Clear session data
	session.clear()

	return jsonify({
	'success': True,
	'message': 'Session ended'
	})

	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=7860, debug=True)