File size: 2,973 Bytes
b1ff431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import base64
import json
from flask import Flask, render_template, request
from worker import speech_to_text, text_to_speech, process_message
from flask_cors import CORS
import os

app = Flask(__name__)
cors = CORS(app, resources={r"/*": {"origins": "*"}})

conversation_history = []

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/speech-to-text', methods=['POST'])
def speech_to_text_route():
    if 'audio' not in request.files:
        return app.response_class(
            response=json.dumps({'error': 'No audio file found'}),
            status=400,
            mimetype='application/json'
        )

    audio_file = request.files['audio']
    language = request.form.get('language', 'en-US')

    audio_binary = audio_file.read()
    text = speech_to_text(audio_binary, language)
    print("processing speech-to-text...")
    # Return the response back to the user in JSON format
    response = app.response_class(
        response=json.dumps({'text': text}),
        status=200,
        mimetype='application/json'
    )
    print(response)
    print(response.data)
    return response


@app.route('/process-message', methods=['POST'])
def process_message_route():
    if request.json is None:
        return app.response_class(
            response=json.dumps({'error': 'Invalid JSON or missing Content-Type header'}),
            status=400,
            mimetype='application/json'
        )
    user_message = request.json.get('userMessage') # Get user's message from their request
    voice = request.json.get('voice') # Get user's preferred voice from their request

    if user_message is None or voice is None:
        return app.response_class(
            response=json.dumps({'error': 'Missing userMessage or voice in request body'}),
            status=400,
            mimetype='application/json'
        )

    conversation_history.append({"role": "user", "content": user_message})

    # Keep the history to the last 5 messages
    if len(conversation_history) > 5:
        conversation_history.pop(0)

    print('user_message', user_message)
    print('voice', voice)
    # Call openai_process_message function to process the user's message and get a response back
    response_text = process_message(conversation_history)

    conversation_history.append({"role": "assistant", "content": response_text})

    response_speech = text_to_speech(response_text, voice)

    response_speech_b64 = ""
    if response_speech:
        response_speech_b64 = base64.b64encode(response_speech).decode('utf-8')

    response = app.response_class(
        response=json.dumps({"openaiResponseText": response_text, "openaiResponseSpeech": response_speech_b64}),
        status=200,
        mimetype='application/json'
    )
    print(response)
    return response


if __name__ == "__main__":
    app.run(port=8000, host='0.0.0.0')