# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python) # OpenAI Chat completion import os from openai import AsyncOpenAI # importing openai for API usage import chainlit as cl # importing chainlit for our app from dotenv import load_dotenv load_dotenv() # Changed keys from Hugging Face so we dont go broke again api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY not found in .env file") #ChatOpenAI Templates system_template = """You are a friendly AI assistant who: 1. Gives short, simple answers 3. Give analogies 2. Uses everyday language 3. Avoids long explanations 4. Gets straight to the point 5. Uses simple examples 6. Keeps responses under 4-5 points when possible 7. No technical terms 8. No "sure" or similar phrases at the start 9. No unnecessary words 10. End with a question or a call to action (if appropriate) Keep it super simple and direct! """ # system_template = """You are a helpful assistant who always speaks in a pleasant tone! # """ # Add user template user_template = """ Think through your response step by step. Question: {user_input} Additional Context (if any): {context} """ # This is where we can add context to the user's question and make it more accurate how we want it to be # We can also add more context to the user's question and make it more accurate how we want it to be # example of context for that would be location age etc # @cl.on_chat_start # marks a function that will be executed at the start of a user session async def start_chat(): settings = { "temperature": 1, # No temperature, just the facts "max_tokens": 300, # Max tokens "top_p": 1, # Top p, 1 is the most random words "frequency_penalty": 0, # No penalty for frequency repeating words "presence_penalty": 0, # No penalty for presence of words } # Initialize conversation session history cl.user_session.set("settings", settings) cl.user_session.set("messages", [{"role": "system", "content": system_template}]) # Welcome message await cl.Message("Hello! I'm ready to help. Send me a message!").send() @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user async def main(message: cl.Message): try: settings = cl.user_session.get("settings") messages = cl.user_session.get("messages") client = AsyncOpenAI(api_key=api_key) print(f"Received message: {message.content}") # Debug print # Add user message to history messages.append({"role": "user", "content": message.content}) # Create a new message msg = cl.Message(content="") await msg.send() # Call OpenAI async for chunk in await client.chat.completions.create( model="gpt-3.5-turbo", # Using GPT-3.5-turbo for cost efficiency??? tried other models also #model="gpt-4o-mini", and cost? Not sure 3.5 seems to work better messages=messages, stream=True, **settings ): if chunk.choices[0].delta.content: print(f"Received token: {chunk.choices[0].delta.content}") # Debug print # Stream the tokens await msg.stream_token(chunk.choices[0].delta.content) # Add assistant's response to history # remember prev response messages.append({"role": "assistant", "content": msg.content}) cl.user_session.set("messages", messages) await msg.update() except Exception as e: error_msg = f"An error occurred: {str(e)}" print(error_msg) # Debug print await cl.Message(content=error_msg).send()