QueryHelper / app.py
expressanalytics's picture
Update app.py
609adf4
raw
history blame
11.4 kB
# !pip install gradio
# !pip install openai
# import openai
import gradio
import pandas as pd
import psycopg2
import pandas as pd
import openai
import sqlite3
import psycopg2
import time
import gradio as gr
import sqlparse
import os
#EA_key
openai.api_key = os.getenv("api_key")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
#database credential
db_name = os.getenv("db_name")
user_db = os.getenv("user_db")
pwd_db = os.getenv("pwd_db")
host_db = os.getenv("host_db")
port_db = os.getenv("port_db")
conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)
# sql="select master_customer_id, c.gender,c.city_name,c.state_name, c.zip_code,product_name,department,class,category,d.date_value,s.city_name as store_city,s.state_name as store_state,s.zip_code as store_zip,s.store_name,s.opened_dt,s.closed_dt, f.transaction_amt,ch.type from oyster_demo.tbl_d_customer c,oyster_demo.tbl_d_product p,oyster_demo.tbl_f_sales f,oyster_demo.tbl_d_date d, oyster_demo.tbl_d_store s,oyster_demo.tbl_d_channel ch where p.product_id=f.product_id and c.customer_id=f.customer_id and d.date_id=f.date_id and s.store_id=f.store_id and ch.channel_id=f.channel_id"
sql2="""select * from lpdatamart.tbl_d_customer limit 10000"""
sql3="""select * from lpdatamart.tbl_d_product limit 1000"""
sql4="""select * from lpdatamart.tbl_f_sales limit 10000"""
# sql5="""select * from lpdatamart.tbl_d_time limit 10000"""
sql6="""select * from lpdatamart.tbl_d_store limit 10000"""
sql7="""select * from lpdatamart.tbl_d_channel limit 10000"""
sql8="""select * from lpdatamart.tbl_d_lineaction_code limit 10000"""
sql9 = """select * from lpdatamart.tbl_d_calendar limit 10000"""
df_customer = pd.read_sql_query(sql2, con=conn)
df_product = pd.read_sql_query(sql3, con=conn)
df_sales = pd.read_sql_query(sql4, con=conn)
# df_time = pd.read_sql_query(sql5, con=conn)
df_store = pd.read_sql_query(sql6, con=conn)
df_channel = pd.read_sql_query(sql7, con=conn)
df_lineaction = pd.read_sql_query(sql8, con=conn)
df_calendar = pd.read_sql_query(sql9, con=conn)
conn.close()
df_customer.head(2)
customer_col=['customer_id','customer_type', 'first_name', 'middle_name', 'household_name', 'last_name', 'personal_email', 'city', 'state', 'zip_code', 'address1', 'country', 'gender', 'phone_number', 'reward_number']
product_col=['product_id', 'product_name', 'product_price', 'department', 'class', 'discount', 'category', 'department_desc', 'department_type', 'product_type', 'manufacturer', 'color']
sales_col = ['store_id', 'customer_id', 'channel_id', 'product_id', 'time_id', 'date_id','order_id', 'line_action', 'discount_amount', 'shipping_amount','transaction_date', 'transaction_amount', 'transaction_type', 'qty_sold']
# time_col = ['time_id', 'hour', 'minute', 'second', 'am_pm']
store_col = ['store_id', 'store_number', 'store_name', 'store_designation', 'store_longitude', 'store_latitude', 'store_manager_name', 'zip_code', 'state_code', 'city', 'street_number', 'street_name', 'store_region', 'store_type', 'address1','sublocationcode', 'channel', 'company_flag', 'kiosk_physical_store', 'sublocation_code']
channel_col = ['channel_id', 'channel_name', 'channel_code']
lineaction_col = ['line_action_code', 'line_action_code_desc', 'load_date', 'catgory', 'sales_type']
calendar_col = ['date_id','calendar_date','calendar_month','day_of_week','calendar_week_number','calendar_month_number','calendar_quarter_number','day_of_month','day_of_quarter','day_of_the_year','us_holiday','lp_holiday','work_day','year','ad_week','ad_week_year','ad_month','lp_day','lp_week','lp_month','lp_year','lp_quarter','event_day']
df_customer=df_customer[customer_col]
df_product=df_product[product_col]
df_sales=df_sales[sales_col]
# df_time = df_time[time_col]
df_store = df_store[store_col]
df_channel = df_channel[channel_col]
df_lineaction = df_lineaction[lineaction_col]
df_calendar = df_calendar[calendar_col]
# df = pd.read_csv('/content/drive/MyDrive/tbl_m_querygen.csv')
import sqlite3
import openai
# Connect to SQLite database
conn1 = sqlite3.connect('chatgpt.db')
cursor1 = conn1.cursor()
# Connect to SQLite database
conn2 = sqlite3.connect('chatgpt.db')
cursor2 = conn2.cursor()
# Connect to SQLite database
conn3 = sqlite3.connect('chatgpt.db')
cursor3 = conn3.cursor()
# Connect to SQLite database
conn4 = sqlite3.connect('chatgpt.db')
cursor4 = conn4.cursor()
# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()
# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()
# Connect to SQLite database
conn6 = sqlite3.connect('chatgpt.db')
cursor6 = conn6.cursor()
# Connect to SQLite database
conn7 = sqlite3.connect('chatgpt.db')
cursor7 = conn7.cursor()
# Connect to SQLite database
conn8 = sqlite3.connect('chatgpt.db')
cursor8 = conn8.cursor()
# openai.api_key = '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'
# Insert DataFrame into SQLite database
df_customer.to_sql('tbl_d_customer', conn1, if_exists='replace', index=False)
df_product.to_sql('tbl_d_product', conn2, if_exists='replace', index=False)
df_sales.to_sql('tbl_f_sales', conn3, if_exists='replace', index=False)
# df_time.to_sql('tbl_d_time', conn4, if_exists='replace', index=False)
df_store.to_sql('tbl_d_store', conn5, if_exists='replace', index=False)
df_channel.to_sql('tbl_d_channel', conn6, if_exists='replace', index=False)
df_lineaction.to_sql('tbl_d_lineaction_code', conn7, if_exists='replace', index=False)
df_calendar.to_sql('tbl_d_calendar', conn8, if_exists ='replace',index=False)
# Function to get table columns from SQLite database
def get_table_columns(table_name1, table_name2):
cursor1.execute("PRAGMA table_info({})".format(table_name1))
columns1 = cursor1.fetchall()
# print(columns)
cursor2.execute("PRAGMA table_info({})".format(table_name2))
columns2 = cursor2.fetchall()
return [column[1] for column in columns1], [column[1] for column in columns2]
table_name1 = 'tbl_d_customer'
table_name2 = 'tbl_d_product'
table_name3 = 'tbl_f_sales'
# table_name4 = 'tbl_d_time'
table_name5 = 'tbl_d_store'
table_name6 = 'tbl_d_channel'
table_name7 = 'tbl_d_lineaction_code'
table_name8 = 'tbl_d_calendar'
columns1,columns2 = get_table_columns(table_name1,table_name2)
# Function to generate SQL query from input text using ChatGPT
def generate_sql_query(text):
# prompt = """You are a ChatGPT language model that can generate SQL queries. Please provide a natural language input text, and I will generate the corresponding SQL query and Answer the provided question if possible for you.The table name is {} and the following data:\n {} and corresponding columns are {}.\nInput: {}\nSQL Query:""".format(table_name,read_csv, columns,text)
messages.append({"role": "user", "content": text})
# print(prompt)
request = openai.ChatCompletion.create(
model="gpt-4",
messages=messages
)
print(request)
sql_query = request['choices'][0]['message']['content']
return sql_query
text = "for female customer who did a transaction of more than 100 dollars in year 2020 please write sql query ?"
schema_name = 'lpdatamart'
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]
sql_query=generate_sql_query(text)
print("Generated SQL query: ",sql_query)
# prompt = """Given an input text, and You will generate the corresponding SQL query. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}.\n""".format(table_name1,df2.loc[:5], table_name2, df3.loc[:5], table_name3, df4.loc[:5])
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]
import time
import gradio as gr
def CustomChatGPT(user_inp):
messages.append({"role": "user", "content": user_inp})
response = openai.ChatCompletion.create(
model = "gpt-4",
messages = messages
)
ChatGPT_reply = response["choices"][0]["message"]["content"]
messages.append({"role": "assistant", "content": ChatGPT_reply})
return ChatGPT_reply
def respond(message, chat_history):
bot_message = CustomChatGPT(message)
chat_history.append((message, bot_message))
time.sleep(2)
return "", chat_history
# to test the generated sql query
def test_Sql(sql):
sql=sql.replace(';', '')
sql = sql + ' ' + 'limit 5'
sql = str(sql)
sql = sqlparse.format(sql, reindent=True, keyword_case='upper')
conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)
df = pd.read_sql_query(sql, con=conn)
conn.close()
return pd.DataFrame(df)
admin = os.getenv("admin")
paswd = os.getenv("paswd")
def same_auth(username, password):
if username == admin and password == paswd:
return 1
with gr.Blocks() as demo:
with gr.Tab("Query Helper"):
gr.Markdown("""<h1><center> Query Helper</center></h1>""")
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
msg.submit(respond, [msg, chatbot], [msg, chatbot])
with gr.Tab("Run Query"):
# gr.Markdown("""<h1><center> Run Query </center></h1>""")
text_input = gr.Textbox(label = 'Input SQL Query', placeholder="Write your SQL query here ...")
text_output = gr.Textbox(label = 'Result')
text_button = gr.Button("RUN QUERY")
clear = gr.ClearButton([text_input, text_output])
text_button.click(test_Sql, inputs=text_input, outputs=text_output)
demo.launch(share=True, same_auth)