import streamlit as st import os import openai from pymongo import MongoClient from datetime import datetime import random # Schema Versions # 1. First version, using text-davinci-003 model # 2. Switched to gpt-3.5-turbo model # 3. Logging the model as well # you need to set your OpenAI API key as environment variable openai.api_key = st.secrets["API_KEY"] MOVIES_EXAMPLE_DOC = """{ _id: ObjectId("573a1390f29313caabcd4135"), genres: [ 'Short' ], runtime: 1, cast: [ 'Charles Kayser', 'John Ott' ], num_mflix_comments: 0, title: 'Blacksmith Scene', countries: [ 'USA' ], released: ISODate("1893-05-09T00:00:00.000Z"), directors: [ 'William K.L. Dickson' ], rated: 'UNRATED', awards: { wins: 1, nominations: 0, text: '1 win.' }, lastupdated: '2015-08-26 00:03:50.133000000', year: 1893, imdb: { rating: 6.2, votes: 1189, id: 5 }, type: 'movie', tomatoes: { viewer: { rating: 3, numReviews: 184, meter: 32 }, lastUpdated: ISODate("2015-06-28T18:34:09.000Z") } }""" MOVIES_EXAMPLE_QUESTIONS = [ ( "How many fantasy or horror movies from the USA with an imdb rating " "greater than 6.0 are there in this dataset?" ), ( "Which movies were released on a Monday and have a higher tomato rating " "than IMDB rating? Keep in mind that IMDB goes from 1-10 and tomatoes " "only from 1-5, so you need to normalise the ratings to do a fair comparison." ), "What movies should I watch to learn more about Japanse culture?", ( "How many movies were released in each decade? Write decade as a string, e.g. " "'1920-1929'. Sort ascending by decade." ), ( "Find movies that are suitable to watch with my kids, both by genre and their " "parental guidance rating. Just recommend good movies." ), ] BASE_CHAT_MESSAGES = [ { "role": "system", "content": "You are an expert English to MongoDB aggregation pipeline translation system." "You will accept an example document from a collection and an English question, and return an aggregation " "pipeline that can answer the question. Do not explain the query or add any additional comments, only " "return a single code block with the aggregation pipeline without the aggregate command.", } ] MODEL_NAME = "gpt-3.5-turbo" @st.cache def ask_model(doc, question): """This is the call to the OpenAI API. It creates a prompt from the document and question and returns the endpoint's response.""" messages = BASE_CHAT_MESSAGES + [ { "role": "user", "content": f"Example document: {doc.strip()}\n\nQuestion: {question.strip()}\n\n", } ] return openai.ChatCompletion.create( model=MODEL_NAME, messages=messages, temperature=0, max_tokens=1000, top_p=1.0, ) def extract_pipeline(response): content = response["choices"][0]["message"]["content"].strip("\n `") return content st.set_page_config(layout="wide") # initialise session state if not "response" in st.session_state: st.session_state.response = None if not "_id" in st.session_state: st.session_state._id = None if not "feedback" in st.session_state: st.session_state.feedback = False if not "default_question" in st.session_state: st.session_state.default_question = random.choice(MOVIES_EXAMPLE_QUESTIONS) # DB access st.markdown( """# English to MQL Demo This demo app uses OpenAI's GPT-4 (gpt-4) model to generate a MongoDB aggregation pipeline from an English question and example document. 🚧 The app is experimental and may return incorrect results. Do not enter any sensitive information! 🚧 """ ) # two-column layout col_left, col_right = st.columns(2, gap="large") with col_left: st.markdown("### Example Document and Question") # wrap textareas in form with st.form("text_inputs"): doc = st.text_area( "Enter example document from collection, e.g. db.collection.findOne()", value=MOVIES_EXAMPLE_DOC, height=300, ) # question textarea question = st.text_area( label="Ask question in English", value=st.session_state.default_question, ) # submit button submitted = st.form_submit_button("Translate", type="primary") if submitted: st.session_state._id = None st.session_state.feedback = False st.session_state.response = ask_model(doc, question) with col_right: st.markdown("### Generated MQL") # show response response = st.session_state.response if response: pipeline = extract_pipeline(response) # print result as code block st.code( pipeline, language="javascript", ) # feedback form with st.empty(): if st.session_state.feedback: st.write("✅ Thank you for your feedback.") elif st.session_state._id: with st.form("feedback_inputs"): radio = st.radio("Is the result correct?", ("Yes", "No")) feedback = st.text_area( "If not, please tell us what the issue is:", ) # submit button feedback_submit = st.form_submit_button( "Submit Feedback", type="secondary" ) if feedback_submit: st.session_state.feedback = { "correct": radio == "Yes", "comment": feedback, } else: doc = { "ts": datetime.now(), "doc": doc, "question": question, "generated_mql": pipeline, "response": response, "version": 3, "model": MODEL_NAME, }