Spaces:

mutukrish
/

eng-to-mql

Runtime error

App Files Files Community

eng-to-mql / app.py

mutukrish

Update app.py

5cc52b4 about 3 years ago

raw

history blame contribute delete

6.07 kB

	import streamlit as st
	import os
	import openai
	from pymongo import MongoClient
	from datetime import datetime
	import random

	# Schema Versions
	# 1. First version, using text-davinci-003 model
	# 2. Switched to gpt-3.5-turbo model
	# 3. Logging the model as well


	# you need to set your OpenAI API key as environment variable
	openai.api_key = st.secrets["API_KEY"]

	MOVIES_EXAMPLE_DOC = """{
	_id: ObjectId("573a1390f29313caabcd4135"),
	genres: [ 'Short' ],
	runtime: 1,
	cast: [ 'Charles Kayser', 'John Ott' ],
	num_mflix_comments: 0,
	title: 'Blacksmith Scene',
	countries: [ 'USA' ],
	released: ISODate("1893-05-09T00:00:00.000Z"),
	directors: [ 'William K.L. Dickson' ],
	rated: 'UNRATED',
	awards: { wins: 1, nominations: 0, text: '1 win.' },
	lastupdated: '2015-08-26 00:03:50.133000000',
	year: 1893,
	imdb: { rating: 6.2, votes: 1189, id: 5 },
	type: 'movie',
	tomatoes: {
	viewer: { rating: 3, numReviews: 184, meter: 32 },
	lastUpdated: ISODate("2015-06-28T18:34:09.000Z")
	}
	}"""

	MOVIES_EXAMPLE_QUESTIONS = [
	(
	"How many fantasy or horror movies from the USA with an imdb rating "
	"greater than 6.0 are there in this dataset?"
	),
	(
	"Which movies were released on a Monday and have a higher tomato rating "
	"than IMDB rating? Keep in mind that IMDB goes from 1-10 and tomatoes "
	"only from 1-5, so you need to normalise the ratings to do a fair comparison."
	),
	"What movies should I watch to learn more about Japanse culture?",
	(
	"How many movies were released in each decade? Write decade as a string, e.g. "
	"'1920-1929'. Sort ascending by decade."
	),
	(
	"Find movies that are suitable to watch with my kids, both by genre and their "
	"parental guidance rating. Just recommend good movies."
	),
	]

	BASE_CHAT_MESSAGES = [
	{
	"role": "system",
	"content": "You are an expert English to MongoDB aggregation pipeline translation system."
	"You will accept an example document from a collection and an English question, and return an aggregation "
	"pipeline that can answer the question. Do not explain the query or add any additional comments, only "
	"return a single code block with the aggregation pipeline without the aggregate command.",
	}
	]

	MODEL_NAME = "gpt-3.5-turbo"


	@st.cache
	def ask_model(doc, question):
	"""This is the call to the OpenAI API. It creates a prompt from the document
	and question and returns the endpoint's response."""

	messages = BASE_CHAT_MESSAGES + [
	{
	"role": "user",
	"content": f"Example document: {doc.strip()}\n\nQuestion: {question.strip()}\n\n",
	}
	]

	return openai.ChatCompletion.create(
	model=MODEL_NAME,
	messages=messages,
	temperature=0,
	max_tokens=1000,
	top_p=1.0,
	)


	def extract_pipeline(response):
	content = response["choices"][0]["message"]["content"].strip("\n `")
	return content


	st.set_page_config(layout="wide")

	# initialise session state
	if not "response" in st.session_state:
	st.session_state.response = None
	if not "_id" in st.session_state:
	st.session_state._id = None
	if not "feedback" in st.session_state:
	st.session_state.feedback = False
	if not "default_question" in st.session_state:
	st.session_state.default_question = random.choice(MOVIES_EXAMPLE_QUESTIONS)

	# DB access


	st.markdown(
	"""# English to MQL Demo

	This demo app uses OpenAI's GPT-4 (gpt-4) model to generate a MongoDB
	aggregation pipeline from an English question and example document.

	🚧 The app is experimental and may return incorrect results. Do not enter any sensitive information! 🚧
	"""
	)


	# two-column layout
	col_left, col_right = st.columns(2, gap="large")

	with col_left:
	st.markdown("### Example Document and Question")
	# wrap textareas in form
	with st.form("text_inputs"):
	doc = st.text_area(
	"Enter example document from collection, e.g. db.collection.findOne()",
	value=MOVIES_EXAMPLE_DOC,
	height=300,
	)

	# question textarea
	question = st.text_area(
	label="Ask question in English",
	value=st.session_state.default_question,
	)

	# submit button
	submitted = st.form_submit_button("Translate", type="primary")
	if submitted:
	st.session_state._id = None
	st.session_state.feedback = False
	st.session_state.response = ask_model(doc, question)


	with col_right:
	st.markdown("### Generated MQL")

	# show response
	response = st.session_state.response
	if response:
	pipeline = extract_pipeline(response)
	# print result as code block
	st.code(
	pipeline,
	language="javascript",
	)

	# feedback form
	with st.empty():
	if st.session_state.feedback:
	st.write("✅ Thank you for your feedback.")

	elif st.session_state._id:
	with st.form("feedback_inputs"):
	radio = st.radio("Is the result correct?", ("Yes", "No"))
	feedback = st.text_area(
	"If not, please tell us what the issue is:",
	)

	# submit button
	feedback_submit = st.form_submit_button(
	"Submit Feedback", type="secondary"
	)
	if feedback_submit:
	st.session_state.feedback = {
	"correct": radio == "Yes",
	"comment": feedback,
	}

	else:
	doc = {
	"ts": datetime.now(),
	"doc": doc,
	"question": question,
	"generated_mql": pipeline,
	"response": response,
	"version": 3,
	"model": MODEL_NAME,
	}