File size: 4,041 Bytes
1975a0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

import gradio as gr
import os
import base64
import openai

LEPTON_API_TOKEN = "o36eVFfCrItZ9hcImxjzeCxk0jPfc7fg"

client = openai.OpenAI(
    base_url="https://llama3-2-3b.lepton.run/api/v1/",
    api_key=os.environ.get('LEPTON_API_TOKEN', LEPTON_API_TOKEN)
)

SYSTEM_PROMPT = """
You are an AI assistant designed to conduct conversations related to collecting outstanding balances or resolving account issues. Your goal is to professionally address the situation while being empathetic and solution-oriented. Maintain a friendly yet professional tone throughout the interaction.
"""

def transcribe_audio(audio):
    if audio is None:
        return "No audio detected. Please try again."
    
    with open(audio, "rb") as f:
        audio_bytes = f.read()
        audio_data = base64.b64encode(audio_bytes).decode()
    
    try:
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            extra_body={
                "tts_audio_format": "opus",
                "tts_audio_bitrate": 16,
                "require_audio": True,
                "tts_preset_id": "jessica",
            },
            messages=[
                {"role": "user", "content": [{"type": "audio", "data": audio_data}]}
            ],
            max_tokens=128,
            stream=True,
        )
        
        text_response = ""
        for chunk in completion:
            if not chunk.choices:
                continue
            content = chunk.choices[0].delta.content
            if content:
                text_response += content
        
        return text_response
    except Exception as e:
        return f"Error in transcribing audio: {str(e)}"

def ai_response(user_message, chat_history):
    try:
        context = [{"role": "system", "content": SYSTEM_PROMPT}]
        for human, ai in chat_history:
            context.append({"role": "user", "content": human})
            context.append({"role": "assistant", "content": ai})
        context.append({"role": "user", "content": user_message})
        
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            extra_body={
                "tts_audio_format": "opus",
                "tts_audio_bitrate": 16,
                "require_audio": True,
                "tts_preset_id": "jessica",
            },
            messages=context,
            max_tokens=256,
            stream=True,
        )
        
        ai_message = ""
        audio_response = []
        
        for chunk in completion:
            if not chunk.choices:
                continue
            content = chunk.choices[0].delta.content
            audio = getattr(chunk.choices[0], 'audio', [])
            if content:
                ai_message += content
            if audio:
                audio_response.extend(audio)
        
        audio_output = b''.join([base64.b64decode(audio) for audio in audio_response])
        
        with open('ai_response.opus', 'wb') as f:
            f.write(audio_output)
        
        chat_history.append((user_message, ai_message))
        return chat_history, 'ai_response.opus'
    except Exception as e:
        error_message = f"Error in generating AI response: {str(e)}"
        chat_history.append((user_message, error_message))
        return chat_history, None

def process_audio(audio, chat_history):
    user_message = transcribe_audio(audio)
    chat_history, ai_audio = ai_response(user_message, chat_history)
    return chat_history, ai_audio, chat_history

iface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(sources=["microphone", "upload"], type="filepath"),
        gr.State([])
    ],
    outputs=[
        gr.Chatbot(),
        gr.Audio(type="filepath"),
        gr.State()
    ],
    title="AI Assistant for Collections",
    description="Have a conversation with an AI assistant to resolve account issues or outstanding balances. You can speak through your microphone or upload an audio file.",
    allow_flagging="never"
)

iface.launch()