Spaces:
Runtime error
Runtime error
File size: 11,304 Bytes
192e7a4 31d10ec 192e7a4 9152447 192e7a4 9152447 192e7a4 9152447 192e7a4 ea6f8b3 192e7a4 ea6f8b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | # !pip install gradio
# !pip install openai
# import openai
import gradio
import pandas as pd
import psycopg2
import pandas as pd
import openai
import sqlite3
import psycopg2
import time
import gradio as gr
import sqlparse
import os
#EA_key
openai.api_key = os.getenv("api_key")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
#database credential
db_name = os.getenv("db_name")
user_db = os.getenv("user_db")
pwd_db = os.getenv("pwd_db")
host_db = os.getenv("host_db")
port_db = os.getenv("port_db")
conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)
# sql="select master_customer_id, c.gender,c.city_name,c.state_name, c.zip_code,product_name,department,class,category,d.date_value,s.city_name as store_city,s.state_name as store_state,s.zip_code as store_zip,s.store_name,s.opened_dt,s.closed_dt, f.transaction_amt,ch.type from oyster_demo.tbl_d_customer c,oyster_demo.tbl_d_product p,oyster_demo.tbl_f_sales f,oyster_demo.tbl_d_date d, oyster_demo.tbl_d_store s,oyster_demo.tbl_d_channel ch where p.product_id=f.product_id and c.customer_id=f.customer_id and d.date_id=f.date_id and s.store_id=f.store_id and ch.channel_id=f.channel_id"
sql2="""select * from lpdatamart.tbl_d_customer limit 10000"""
sql3="""select * from lpdatamart.tbl_d_product limit 1000"""
sql4="""select * from lpdatamart.tbl_f_sales limit 10000"""
# sql5="""select * from lpdatamart.tbl_d_time limit 10000"""
sql6="""select * from lpdatamart.tbl_d_store limit 10000"""
sql7="""select * from lpdatamart.tbl_d_channel limit 10000"""
sql8="""select * from lpdatamart.tbl_d_lineaction_code limit 10000"""
sql9 = """select * from lpdatamart.tbl_d_calendar limit 10000"""
df_customer = pd.read_sql_query(sql2, con=conn)
df_product = pd.read_sql_query(sql3, con=conn)
df_sales = pd.read_sql_query(sql4, con=conn)
# df_time = pd.read_sql_query(sql5, con=conn)
df_store = pd.read_sql_query(sql6, con=conn)
df_channel = pd.read_sql_query(sql7, con=conn)
df_lineaction = pd.read_sql_query(sql8, con=conn)
df_calendar = pd.read_sql_query(sql9, con=conn)
conn.close()
df_customer.head(2)
customer_col=['customer_id','customer_type', 'first_name', 'middle_name', 'household_name', 'last_name', 'personal_email', 'city', 'state', 'zip_code', 'address1', 'country', 'gender', 'phone_number', 'reward_number']
product_col=['product_id', 'product_name', 'product_price', 'department', 'class', 'discount', 'category', 'department_desc', 'department_type', 'product_type', 'manufacturer', 'color']
sales_col = ['store_id', 'customer_id', 'channel_id', 'product_id', 'time_id', 'date_id','order_id', 'line_action', 'discount_amount', 'shipping_amount','transaction_date', 'transaction_amount', 'transaction_type', 'qty_sold']
# time_col = ['time_id', 'hour', 'minute', 'second', 'am_pm']
store_col = ['store_id', 'store_number', 'store_name', 'store_designation', 'store_longitude', 'store_latitude', 'store_manager_name', 'zip_code', 'state_code', 'city', 'street_number', 'street_name', 'store_region', 'store_type', 'address1','sublocationcode', 'channel', 'company_flag', 'kiosk_physical_store', 'sublocation_code']
channel_col = ['channel_id', 'channel_name', 'channel_code']
lineaction_col = ['line_action_code', 'line_action_code_desc', 'load_date', 'catgory', 'sales_type']
calendar_col = ['date_id','calendar_date','calendar_month','day_of_week','calendar_week_number','calendar_month_number','calendar_quarter_number','day_of_month','day_of_quarter','day_of_the_year','us_holiday','lp_holiday','work_day','year','ad_week','ad_week_year','ad_month','lp_day','lp_week','lp_month','lp_year','lp_quarter','event_day']
df_customer=df_customer[customer_col]
df_product=df_product[product_col]
df_sales=df_sales[sales_col]
# df_time = df_time[time_col]
df_store = df_store[store_col]
df_channel = df_channel[channel_col]
df_lineaction = df_lineaction[lineaction_col]
df_calendar = df_calendar[calendar_col]
# df = pd.read_csv('/content/drive/MyDrive/tbl_m_querygen.csv')
import sqlite3
import openai
# Connect to SQLite database
conn1 = sqlite3.connect('chatgpt.db')
cursor1 = conn1.cursor()
# Connect to SQLite database
conn2 = sqlite3.connect('chatgpt.db')
cursor2 = conn2.cursor()
# Connect to SQLite database
conn3 = sqlite3.connect('chatgpt.db')
cursor3 = conn3.cursor()
# Connect to SQLite database
conn4 = sqlite3.connect('chatgpt.db')
cursor4 = conn4.cursor()
# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()
# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()
# Connect to SQLite database
conn6 = sqlite3.connect('chatgpt.db')
cursor6 = conn6.cursor()
# Connect to SQLite database
conn7 = sqlite3.connect('chatgpt.db')
cursor7 = conn7.cursor()
# Connect to SQLite database
conn8 = sqlite3.connect('chatgpt.db')
cursor8 = conn8.cursor()
# openai.api_key = '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'
# Insert DataFrame into SQLite database
df_customer.to_sql('tbl_d_customer', conn1, if_exists='replace', index=False)
df_product.to_sql('tbl_d_product', conn2, if_exists='replace', index=False)
df_sales.to_sql('tbl_f_sales', conn3, if_exists='replace', index=False)
# df_time.to_sql('tbl_d_time', conn4, if_exists='replace', index=False)
df_store.to_sql('tbl_d_store', conn5, if_exists='replace', index=False)
df_channel.to_sql('tbl_d_channel', conn6, if_exists='replace', index=False)
df_lineaction.to_sql('tbl_d_lineaction_code', conn7, if_exists='replace', index=False)
df_calendar.to_sql('tbl_d_calendar', conn8, if_exists ='replace',index=False)
# Function to get table columns from SQLite database
def get_table_columns(table_name1, table_name2):
cursor1.execute("PRAGMA table_info({})".format(table_name1))
columns1 = cursor1.fetchall()
# print(columns)
cursor2.execute("PRAGMA table_info({})".format(table_name2))
columns2 = cursor2.fetchall()
return [column[1] for column in columns1], [column[1] for column in columns2]
table_name1 = 'tbl_d_customer'
table_name2 = 'tbl_d_product'
table_name3 = 'tbl_f_sales'
# table_name4 = 'tbl_d_time'
table_name5 = 'tbl_d_store'
table_name6 = 'tbl_d_channel'
table_name7 = 'tbl_d_lineaction_code'
table_name8 = 'tbl_d_calendar'
columns1,columns2 = get_table_columns(table_name1,table_name2)
# Function to generate SQL query from input text using ChatGPT
def generate_sql_query(text):
# prompt = """You are a ChatGPT language model that can generate SQL queries. Please provide a natural language input text, and I will generate the corresponding SQL query and Answer the provided question if possible for you.The table name is {} and the following data:\n {} and corresponding columns are {}.\nInput: {}\nSQL Query:""".format(table_name,read_csv, columns,text)
messages.append({"role": "user", "content": text})
# print(prompt)
request = openai.ChatCompletion.create(
model="gpt-4",
messages=messages
)
print(request)
sql_query = request['choices'][0]['message']['content']
return sql_query
text = "for female customer who did a transaction of more than 100 dollars in year 2020 please write sql query ?"
schema_name = 'lpdatamart'
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]
sql_query=generate_sql_query(text)
print("Generated SQL query: ",sql_query)
# prompt = """Given an input text, and You will generate the corresponding SQL query. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}.\n""".format(table_name1,df2.loc[:5], table_name2, df3.loc[:5], table_name3, df4.loc[:5])
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]
import time
import gradio as gr
def CustomChatGPT(user_inp):
messages.append({"role": "user", "content": user_inp})
response = openai.ChatCompletion.create(
model = "gpt-4",
messages = messages
)
ChatGPT_reply = response["choices"][0]["message"]["content"]
messages.append({"role": "assistant", "content": ChatGPT_reply})
return ChatGPT_reply
def respond(message, chat_history):
bot_message = CustomChatGPT(message)
chat_history.append((message, bot_message))
time.sleep(2)
return "", chat_history
# to test the generated sql query
def test_Sql(sql):
sql=sql.replace(';', '')
sql = sql + ' ' + 'limit 5'
sql = str(sql)
sql = sqlparse.format(sql, reindent=True, keyword_case='upper')
conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)
df = pd.read_sql_query(sql, con=conn)
conn.close()
return pd.DataFrame(df)
admin = os.getenv("admin")
paswd = os.getenv("paswd")
with gr.Blocks() as demo:
with gr.Tab("Query Helper"):
gr.Markdown("""<h1><center> Query Helper</center></h1>""")
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
msg.submit(respond, [msg, chatbot], [msg, chatbot])
with gr.Tab("Run Query"):
# gr.Markdown("""<h1><center> Run Query </center></h1>""")
text_input = gr.Textbox(label = 'Input SQL Query', placeholder="Write your SQL query here ...")
text_output = gr.Textbox(label = 'Result')
text_button = gr.Button("RUN QUERY")
clear = gr.ClearButton([text_input, text_output])
text_button.click(test_Sql, inputs=text_input, outputs=text_output)
demo.launch(share=True, auth=(admin, paswd)) |