File size: 4,614 Bytes
693a64e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09cff11
693a64e
09cff11
693a64e
 
09cff11
693a64e
09cff11
693a64e
09cff11
693a64e
 
 
 
 
 
09cff11
 
 
1a0cdd2
693a64e
 
 
 
 
 
 
e858154
693a64e
 
 
 
 
 
e858154
693a64e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02c9c2d
693a64e
 
 
 
 
09cff11
693a64e
09cff11
 
693a64e
 
 
 
 
 
 
 
 
 
09cff11
693a64e
 
 
09cff11
 
693a64e
 
 
 
 
 
 
 
09cff11
693a64e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd5c6b5
693a64e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
import os
import openai
from pymongo import MongoClient
from datetime import datetime
import random

# Schema Versions
# 1. First version, using text-davinci-003 model
# 2. Switched to gpt-3.5-turbo model
# 3. Logging the model as well


# you need to set your OpenAI API key as environment variable
openai.api_key = st.secrets["API_KEY"]

SQL_EXAMPLE_QUERIES = [
    (
        "SELECT CustomerName, City FROM Customers;"
    ),
    (
        "Select distinct Salary from Employee e1 where 2=Select count(distinct Salary) from Employee e2 where e1.salary<=e2.salary;"
    ),
    "Select Employee_name,Salary/12 as ‘Monthly Salary’ from employee;",
    (
        "Select * from Employee where Rowid= select min(Rowid) from Employee;"
    ),
]

BASE_CHAT_MESSAGES = [
    {
        "role": "system",
        "content": "You are an expert SQL to MongoDB aggregation pipeline translation system."
        "You will accept SQL query and return a MongoDB aggregation pipeline "
        "that can convert the SQL query. Do not explain the query or add any additional comments, only "
        "return a single code block with the aggregation pipeline without any headers.",
    }
]

MODEL_NAME = "gpt-3.5-turbo"


@st.cache
def ask_model(query):
    """This is the call to the OpenAI API. It creates a prompt from the document
    and question and returns the endpoint's response."""

    messages = BASE_CHAT_MESSAGES + [
        {
            "role": "user",
            "content": f"Example SQL Query: {query.strip()}\n\n",
        }
    ]

    return openai.ChatCompletion.create(
        model=MODEL_NAME,
        messages=messages,
        temperature=0,
        max_tokens=1000,
        top_p=1.0,
    )


def extract_pipeline(response):
    content = response["choices"][0]["message"]["content"].strip("\n `")
    return content


st.set_page_config(layout="wide")

# initialise session state
if not "response" in st.session_state:
    st.session_state.response = None
if not "_id" in st.session_state:
    st.session_state._id = None
if not "feedback" in st.session_state:
    st.session_state.feedback = False
if not "default_question" in st.session_state:
    st.session_state.default_question = random.choice(SQL_EXAMPLE_QUERIES)

# DB access


st.markdown(
    """# SQL to MQL Demo

This demo app uses OpenAI's GPT-3.5 (gpt-3.5) model to generate a MongoDB
aggregation pipeline from a SQL query.

🚧 The app is experimental and may return incorrect results. Do not enter any sensitive information! 🚧
"""
)


# two-column layout
col_left, col_right = st.columns(2, gap="large")

with col_left:
    st.markdown("### Example SQL query")
    # wrap textareas in form
    with st.form("text_inputs"):
        # question textarea
        query = st.text_area(
            label="SQL query",
            value=st.session_state.default_question,
        )

        # submit button
        submitted = st.form_submit_button("Translate", type="primary")
        if submitted:
            st.session_state._id = None
            st.session_state.feedback = False
            st.session_state.response = ask_model(query)


with col_right:
    st.markdown("### Generated MQL")

    # show response
    response = st.session_state.response
    if response:
        pipeline = extract_pipeline(response)
        # print result as code block
        st.code(
            pipeline,
            language="javascript",
        )

        # feedback form
        with st.empty():
            if st.session_state.feedback:
                st.write("✅ Thank you for your feedback.")

            elif st.session_state._id:
                with st.form("feedback_inputs"):
                    radio = st.radio("Is the result correct?", ("Yes", "No"))
                    feedback = st.text_area(
                        "If not, please tell us what the issue is:",
                    )

                    # submit button
                    feedback_submit = st.form_submit_button(
                        "Submit Feedback", type="secondary"
                    )
                    if feedback_submit:
                        st.session_state.feedback = {
                            "correct": radio == "Yes",
                            "comment": feedback,
                        }

            else:
                doc = {
                    "ts": datetime.now(),
                    "question": query,
                    "generated_mql": pipeline,
                    "response": response,
                    "version": 3,
                    "model": MODEL_NAME,
                }