File size: 11,304 Bytes
192e7a4
 
 
31d10ec
192e7a4
 
 
 
 
 
 
 
 
 
 
 
9152447
192e7a4
 
9152447
192e7a4
 
 
 
9152447
 
 
 
 
 
192e7a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea6f8b3
 
 
192e7a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea6f8b3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# !pip install gradio
# !pip install openai
# import openai

import gradio
import pandas as pd
import psycopg2

import pandas as pd
import openai

import sqlite3
import psycopg2
import time
import gradio as gr
import sqlparse
import os

#EA_key
openai.api_key = os.getenv("api_key")

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#database credential
db_name = os.getenv("db_name")
user_db = os.getenv("user_db")
pwd_db = os.getenv("pwd_db")
host_db = os.getenv("host_db")
port_db = os.getenv("port_db")

conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)

# sql="select master_customer_id, c.gender,c.city_name,c.state_name, c.zip_code,product_name,department,class,category,d.date_value,s.city_name as store_city,s.state_name as store_state,s.zip_code as store_zip,s.store_name,s.opened_dt,s.closed_dt, f.transaction_amt,ch.type from oyster_demo.tbl_d_customer c,oyster_demo.tbl_d_product p,oyster_demo.tbl_f_sales f,oyster_demo.tbl_d_date d, oyster_demo.tbl_d_store s,oyster_demo.tbl_d_channel ch where p.product_id=f.product_id and c.customer_id=f.customer_id and d.date_id=f.date_id and s.store_id=f.store_id and ch.channel_id=f.channel_id"
sql2="""select * from lpdatamart.tbl_d_customer limit 10000"""
sql3="""select * from lpdatamart.tbl_d_product limit 1000"""
sql4="""select * from lpdatamart.tbl_f_sales limit 10000"""
# sql5="""select * from lpdatamart.tbl_d_time limit 10000"""
sql6="""select * from lpdatamart.tbl_d_store limit 10000"""
sql7="""select * from lpdatamart.tbl_d_channel limit 10000"""
sql8="""select * from lpdatamart.tbl_d_lineaction_code limit 10000"""
sql9 = """select * from lpdatamart.tbl_d_calendar limit 10000"""

df_customer = pd.read_sql_query(sql2, con=conn)
df_product = pd.read_sql_query(sql3, con=conn)
df_sales = pd.read_sql_query(sql4, con=conn)
# df_time = pd.read_sql_query(sql5, con=conn)
df_store = pd.read_sql_query(sql6, con=conn)
df_channel = pd.read_sql_query(sql7, con=conn)
df_lineaction = pd.read_sql_query(sql8, con=conn)
df_calendar = pd.read_sql_query(sql9, con=conn)


conn.close()
df_customer.head(2)

customer_col=['customer_id','customer_type', 'first_name', 'middle_name', 'household_name', 'last_name', 'personal_email', 'city', 'state', 'zip_code', 'address1', 'country', 'gender', 'phone_number', 'reward_number']
product_col=['product_id', 'product_name', 'product_price', 'department', 'class',  'discount', 'category', 'department_desc', 'department_type', 'product_type', 'manufacturer', 'color']
sales_col = ['store_id', 'customer_id', 'channel_id', 'product_id', 'time_id', 'date_id','order_id', 'line_action', 'discount_amount', 'shipping_amount','transaction_date', 'transaction_amount', 'transaction_type', 'qty_sold']
# time_col = ['time_id', 'hour', 'minute', 'second', 'am_pm']
store_col = ['store_id', 'store_number', 'store_name', 'store_designation', 'store_longitude', 'store_latitude', 'store_manager_name', 'zip_code', 'state_code', 'city', 'street_number', 'street_name', 'store_region', 'store_type', 'address1','sublocationcode', 'channel', 'company_flag', 'kiosk_physical_store', 'sublocation_code']
channel_col = ['channel_id', 'channel_name', 'channel_code']
lineaction_col = ['line_action_code', 'line_action_code_desc', 'load_date', 'catgory', 'sales_type']
calendar_col = ['date_id','calendar_date','calendar_month','day_of_week','calendar_week_number','calendar_month_number','calendar_quarter_number','day_of_month','day_of_quarter','day_of_the_year','us_holiday','lp_holiday','work_day','year','ad_week','ad_week_year','ad_month','lp_day','lp_week','lp_month','lp_year','lp_quarter','event_day']

df_customer=df_customer[customer_col]
df_product=df_product[product_col]
df_sales=df_sales[sales_col]
# df_time = df_time[time_col]
df_store = df_store[store_col]
df_channel = df_channel[channel_col]
df_lineaction = df_lineaction[lineaction_col]
df_calendar = df_calendar[calendar_col]

# df = pd.read_csv('/content/drive/MyDrive/tbl_m_querygen.csv')


import sqlite3
import openai

# Connect to SQLite database
conn1 = sqlite3.connect('chatgpt.db')
cursor1 = conn1.cursor()

# Connect to SQLite database
conn2 = sqlite3.connect('chatgpt.db')
cursor2 = conn2.cursor()

# Connect to SQLite database
conn3 = sqlite3.connect('chatgpt.db')
cursor3 = conn3.cursor()

# Connect to SQLite database
conn4 = sqlite3.connect('chatgpt.db')
cursor4 = conn4.cursor()

# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()

# Connect to SQLite database
conn5 = sqlite3.connect('chatgpt.db')
cursor5 = conn5.cursor()

# Connect to SQLite database
conn6 = sqlite3.connect('chatgpt.db')
cursor6 = conn6.cursor()

# Connect to SQLite database
conn7 = sqlite3.connect('chatgpt.db')
cursor7 = conn7.cursor()

# Connect to SQLite database
conn8 = sqlite3.connect('chatgpt.db')
cursor8 = conn8.cursor()


# openai.api_key = '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'

# Insert DataFrame into SQLite database
df_customer.to_sql('tbl_d_customer', conn1, if_exists='replace', index=False)
df_product.to_sql('tbl_d_product', conn2, if_exists='replace', index=False)
df_sales.to_sql('tbl_f_sales', conn3, if_exists='replace', index=False)
# df_time.to_sql('tbl_d_time', conn4, if_exists='replace', index=False)
df_store.to_sql('tbl_d_store', conn5, if_exists='replace', index=False)
df_channel.to_sql('tbl_d_channel', conn6, if_exists='replace', index=False)
df_lineaction.to_sql('tbl_d_lineaction_code', conn7, if_exists='replace', index=False)
df_calendar.to_sql('tbl_d_calendar', conn8, if_exists ='replace',index=False)

# Function to get table columns from SQLite database
def get_table_columns(table_name1, table_name2):
    cursor1.execute("PRAGMA table_info({})".format(table_name1))
    columns1 = cursor1.fetchall()
    # print(columns)

    cursor2.execute("PRAGMA table_info({})".format(table_name2))
    columns2 = cursor2.fetchall()

    return [column[1] for column in columns1], [column[1] for column in columns2]

table_name1 = 'tbl_d_customer'
table_name2 = 'tbl_d_product'
table_name3 = 'tbl_f_sales'

# table_name4 = 'tbl_d_time'
table_name5 = 'tbl_d_store'
table_name6 = 'tbl_d_channel'
table_name7 = 'tbl_d_lineaction_code'
table_name8 = 'tbl_d_calendar'

columns1,columns2  = get_table_columns(table_name1,table_name2)



# Function to generate SQL query from input text using ChatGPT
def generate_sql_query(text):
    # prompt = """You are a ChatGPT language model that can generate SQL queries. Please provide a natural language input text, and I will generate the corresponding SQL query and Answer the provided question if possible for you.The table name is {} and the following data:\n {} and corresponding columns are {}.\nInput: {}\nSQL Query:""".format(table_name,read_csv, columns,text)

    messages.append({"role": "user", "content": text})
    # print(prompt)
    request = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages
    )
    print(request)
    sql_query = request['choices'][0]['message']['content']
    return sql_query

text = "for female customer who did a transaction of more than 100 dollars in year 2020 please write sql query ?"

schema_name = 'lpdatamart'
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5],  table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]

sql_query=generate_sql_query(text)
print("Generated SQL query: ",sql_query)

# prompt = """Given an input text, and You will generate the corresponding SQL query. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}.\n""".format(table_name1,df2.loc[:5], table_name2, df3.loc[:5], table_name3, df4.loc[:5])
prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5],  table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5])
messages = [{"role": "system", "content": prompt}]

import time
import gradio as gr
def CustomChatGPT(user_inp):
    messages.append({"role": "user", "content": user_inp})
    response = openai.ChatCompletion.create(
        model = "gpt-4",
        messages = messages
    )
    ChatGPT_reply = response["choices"][0]["message"]["content"]
    messages.append({"role": "assistant", "content": ChatGPT_reply})
    return ChatGPT_reply

def respond(message, chat_history):
    bot_message = CustomChatGPT(message)
    chat_history.append((message, bot_message))
    time.sleep(2)
    return "", chat_history

# to test the generated sql query
def test_Sql(sql):
  sql=sql.replace(';', '')
  sql = sql + ' ' + 'limit 5'
  sql = str(sql)
  sql = sqlparse.format(sql, reindent=True, keyword_case='upper')

  conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db)
  df = pd.read_sql_query(sql, con=conn)
  conn.close()
  return pd.DataFrame(df)

admin = os.getenv("admin")
paswd = os.getenv("paswd")

with gr.Blocks() as demo:
    with gr.Tab("Query Helper"):
        gr.Markdown("""<h1><center> Query Helper</center></h1>""")
        chatbot = gr.Chatbot()
        msg = gr.Textbox()
        clear = gr.ClearButton([msg, chatbot])
        msg.submit(respond, [msg, chatbot], [msg, chatbot])

    with gr.Tab("Run Query"):
        # gr.Markdown("""<h1><center> Run Query </center></h1>""")
        text_input = gr.Textbox(label = 'Input SQL Query', placeholder="Write your SQL query here ...")
        text_output = gr.Textbox(label = 'Result')
        text_button = gr.Button("RUN QUERY")
        clear = gr.ClearButton([text_input, text_output])
        text_button.click(test_Sql, inputs=text_input, outputs=text_output)

demo.launch(share=True, auth=(admin, paswd))