Spaces:
Sleeping
Sleeping
| # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python) | |
| # OpenAI Chat completion | |
| import os | |
| from openai import AsyncOpenAI # importing openai for API usage | |
| import chainlit as cl # importing chainlit for our app | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Changed keys from Hugging Face so we dont go broke again | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY not found in .env file") | |
| #ChatOpenAI Templates | |
| system_template = """You are a friendly AI assistant who: | |
| 1. Gives short, simple answers | |
| 3. Give analogies | |
| 2. Uses everyday language | |
| 3. Avoids long explanations | |
| 4. Gets straight to the point | |
| 5. Uses simple examples | |
| 6. Keeps responses under 4-5 points when possible | |
| 7. No technical terms | |
| 8. No "sure" or similar phrases at the start | |
| 9. No unnecessary words | |
| 10. End with a question or a call to action (if appropriate) | |
| Keep it super simple and direct! | |
| """ | |
| # system_template = """You are a helpful assistant who always speaks in a pleasant tone! | |
| # """ | |
| # Add user template | |
| user_template = """ | |
| Think through your response step by step. | |
| Question: {user_input} | |
| Additional Context (if any): {context} | |
| """ | |
| # This is where we can add context to the user's question and make it more accurate how we want it to be | |
| # We can also add more context to the user's question and make it more accurate how we want it to be | |
| # example of context for that would be location age etc | |
| # | |
| # marks a function that will be executed at the start of a user session | |
| async def start_chat(): | |
| settings = { | |
| "temperature": 1, # No temperature, just the facts | |
| "max_tokens": 300, # Max tokens | |
| "top_p": 1, # Top p, 1 is the most random words | |
| "frequency_penalty": 0, # No penalty for frequency repeating words | |
| "presence_penalty": 0, # No penalty for presence of words | |
| } | |
| # Initialize conversation session history | |
| cl.user_session.set("settings", settings) | |
| cl.user_session.set("messages", [{"role": "system", "content": system_template}]) | |
| # Welcome message | |
| await cl.Message("Hello! I'm ready to help. Send me a message!").send() | |
| # marks a function that should be run each time the chatbot receives a message from a user | |
| async def main(message: cl.Message): | |
| try: | |
| settings = cl.user_session.get("settings") | |
| messages = cl.user_session.get("messages") | |
| client = AsyncOpenAI(api_key=api_key) | |
| print(f"Received message: {message.content}") # Debug print | |
| # Add user message to history | |
| messages.append({"role": "user", "content": message.content}) | |
| # Create a new message | |
| msg = cl.Message(content="") | |
| await msg.send() | |
| # Call OpenAI | |
| async for chunk in await client.chat.completions.create( | |
| model="gpt-3.5-turbo", # Using GPT-3.5-turbo for cost efficiency??? tried other models also | |
| #model="gpt-4o-mini", and cost? Not sure 3.5 seems to work better | |
| messages=messages, | |
| stream=True, | |
| **settings | |
| ): | |
| if chunk.choices[0].delta.content: | |
| print(f"Received token: {chunk.choices[0].delta.content}") | |
| # Debug print | |
| # Stream the tokens | |
| await msg.stream_token(chunk.choices[0].delta.content) | |
| # Add assistant's response to history | |
| # remember prev response | |
| messages.append({"role": "assistant", "content": msg.content}) | |
| cl.user_session.set("messages", messages) | |
| await msg.update() | |
| except Exception as e: | |
| error_msg = f"An error occurred: {str(e)}" | |
| print(error_msg) | |
| # Debug print | |
| await cl.Message(content=error_msg).send() |