import base64 import json from flask import Flask, render_template, request from worker import speech_to_text, text_to_speech, process_message from flask_cors import CORS import os app = Flask(__name__) cors = CORS(app, resources={r"/*": {"origins": "*"}}) conversation_history = [] @app.route('/') def index(): return render_template('index.html') @app.route('/speech-to-text', methods=['POST']) def speech_to_text_route(): if 'audio' not in request.files: return app.response_class( response=json.dumps({'error': 'No audio file found'}), status=400, mimetype='application/json' ) audio_file = request.files['audio'] language = request.form.get('language', 'en-US') audio_binary = audio_file.read() text = speech_to_text(audio_binary, language) print("processing speech-to-text...") # Return the response back to the user in JSON format response = app.response_class( response=json.dumps({'text': text}), status=200, mimetype='application/json' ) print(response) print(response.data) return response @app.route('/process-message', methods=['POST']) def process_message_route(): if request.json is None: return app.response_class( response=json.dumps({'error': 'Invalid JSON or missing Content-Type header'}), status=400, mimetype='application/json' ) user_message = request.json.get('userMessage') # Get user's message from their request voice = request.json.get('voice') # Get user's preferred voice from their request if user_message is None or voice is None: return app.response_class( response=json.dumps({'error': 'Missing userMessage or voice in request body'}), status=400, mimetype='application/json' ) conversation_history.append({"role": "user", "content": user_message}) # Keep the history to the last 5 messages if len(conversation_history) > 5: conversation_history.pop(0) print('user_message', user_message) print('voice', voice) # Call openai_process_message function to process the user's message and get a response back response_text = process_message(conversation_history) conversation_history.append({"role": "assistant", "content": response_text}) response_speech = text_to_speech(response_text, voice) response_speech_b64 = "" if response_speech: response_speech_b64 = base64.b64encode(response_speech).decode('utf-8') response = app.response_class( response=json.dumps({"openaiResponseText": response_text, "openaiResponseSpeech": response_speech_b64}), status=200, mimetype='application/json' ) print(response) return response if __name__ == "__main__": app.run(port=8000, host='0.0.0.0')