""" /usr/local/bin/python3.13 -m pip install python-dotenv """ import asyncio from openai import AsyncOpenAI import os from dotenv import load_dotenv import gradio as gr from pathlib import Path api_key = os.getenv("OPENAI_API_KEY") client = AsyncOpenAI(api_key=api_key) system_prompt = """ You are a friendly South African AI who uses South African slang! You provide positive feedback, help your users, and ask them insightful questions. You always output your response in the user's language """ # global variable for memory transcript_summary = "No summary yet" # create translation function async def translate(text, target_language): prompt = f"Translate the following text to {target_language}: \n\n{text}" # send prompt to OpenAI response = await client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], temperature = 0, ) # return the text output from the first completion return response.choices[0].message.content async def summarize_memory(previous_summary, new_messages): summarize_prompt = f""" You are a transcript compressor. Compress the conversation into 3 bullet points. Include personal instructions about how to talk to this user. Keep old information and do not overwrite it. Output in English. Also use plain text, no markdown. """ summary = await client.chat.completions.create( model= "gpt-4o-mini", messages=[ { "role": "system", "content": summarize_prompt, }, { "role": "user", "content": f"Previous Summary: {previous_summary}\n\n New messages: {new_messages}", }, ], ) return summary.choices[0].message.content async def chat_respond(message, history): global transcript_summary # construct the messages for the AI(AI knows the conversation context and your memory summary) messages = [ { "role": "system", "content": system_prompt + f"\n\nSummary: {transcript_summary}", }, {"role": "user", "content":message}, ] # Send the request to the API with streaming stream= await client.chat.completions.create( model="gpt-4o-mini", messages=messages, stream=True, # returns chunks of text as the AI generates them temperature=0.7, # adds some creativity to the AI's output ) # collect the streaming response assistant_response = "" async for chunk in stream: content = chunk.choices[0].delta.content # gets the text generated in this chunk if content: assistant_response += content yield assistant_response # Update memory after response is complete transcript_summary = await summarize_memory( transcript_summary, f"User: {message}\nAI: {assistant_response}", ) # Get translation and memory async def get_translations_and_memory(message, history): global transcript_summary # Get the latest assistant response if history and len(history) > 0: latest_response = history[-1][-1] # Get the assistants latest response # Get translations in parallel translations = await asyncio.gather( translate(latest_response, "English"), translate(latest_response, "Afrikaans"), translate(latest_response, "Zulu"), translate(latest_response, "Xhosa") ) translations_text = f"""**🇿🇦 Translations:** **English:** {translations[0]} **Afrikaans** {translations[1]} **Zulu** {translations[2]} **Xhosa** {translations[3]}""" memory_text = f"""**🧠Internal Memory:** {transcript_summary}""" return translations_text, memory_text return "No translations available yet.", "No memory summary yet." # create gradio interface # Define interface def create_interface(): with gr.Blocks(title="Saffalingual AI Chatbot", theme=gr.themes.Soft()) as demo: # Header section gr.HTML("
Chat with AI and see translations in four of South Africa's official languages!
") # Layout with rows and columns with gr.Row(): #left column features (Chatbot and input) with gr.Column(scale=2): chatbot = gr.Chatbot( height=500, show_label = False, #no heading container= True, #wrapped bubble_full_width=False ) with gr.Row(): msg = gr.Textbox( placeholder="Type your message here...", show_label=False, scale=4, container=False, ) submit_btn = gr.Button("Send", variant="primary", scale=1) clear_btn = gr.Button("Clear Chat", variant="secondary") # Right column (Translations and memory) with gr.Column(scale=1): translations_box = gr.Markdown( value="Translations will appear here after you send a message.", label = "Translations", #heading ) memory_box = gr.Markdown( value="Memory summary will appear here.", label="Memory" ) #Event handlers async def respond_and_update(message, history): # get chat response last_response="" async for response in chat_respond(message, history): last_response=response # Update chatbot with streaming response new_history = history + [[message, last_response]] # update new_history in the chatbot component, clear input box, keep values the same for translations and memory yield new_history, "", translations_box.value, memory_box.value # After response is complete, get tranlsations and memory final_history = history + [[message, last_response]] translations, memory = await get_translations_and_memory( message, final_history ) yield final_history, "", translations, memory def clear_chat(): global transcript_summary transcript_summary = "No summary yet" return ( [], "", "Translations will appear here after you send a message.", "Memory summary will appear here.", ) # Connect events submit_btn.click( respond_and_update, inputs=[msg, chatbot], outputs=[chatbot, msg, translations_box, memory_box], ) msg.submit( respond_and_update, inputs=[msg, chatbot], outputs=[chatbot, msg, translations_box, memory_box], ) clear_btn.click( clear_chat, outputs=[chatbot, msg, translations_box, memory_box] ) return demo if __name__ == "__main__": demo = create_interface() demo.queue().launch()