# !pip install gradio # !pip install openai # import openai import gradio import pandas as pd import psycopg2 import pandas as pd import openai import sqlite3 import psycopg2 import time import gradio as gr import sqlparse import os #EA_key openai.api_key = os.getenv("api_key") pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) #database credential db_name = os.getenv("db_name") user_db = os.getenv("user_db") pwd_db = os.getenv("pwd_db") host_db = os.getenv("host_db") port_db = os.getenv("port_db") conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db) # sql="select master_customer_id, c.gender,c.city_name,c.state_name, c.zip_code,product_name,department,class,category,d.date_value,s.city_name as store_city,s.state_name as store_state,s.zip_code as store_zip,s.store_name,s.opened_dt,s.closed_dt, f.transaction_amt,ch.type from oyster_demo.tbl_d_customer c,oyster_demo.tbl_d_product p,oyster_demo.tbl_f_sales f,oyster_demo.tbl_d_date d, oyster_demo.tbl_d_store s,oyster_demo.tbl_d_channel ch where p.product_id=f.product_id and c.customer_id=f.customer_id and d.date_id=f.date_id and s.store_id=f.store_id and ch.channel_id=f.channel_id" sql2="""select * from lpdatamart.tbl_d_customer limit 10000""" sql3="""select * from lpdatamart.tbl_d_product limit 1000""" sql4="""select * from lpdatamart.tbl_f_sales limit 10000""" # sql5="""select * from lpdatamart.tbl_d_time limit 10000""" sql6="""select * from lpdatamart.tbl_d_store limit 10000""" sql7="""select * from lpdatamart.tbl_d_channel limit 10000""" sql8="""select * from lpdatamart.tbl_d_lineaction_code limit 10000""" sql9 = """select * from lpdatamart.tbl_d_calendar limit 10000""" df_customer = pd.read_sql_query(sql2, con=conn) df_product = pd.read_sql_query(sql3, con=conn) df_sales = pd.read_sql_query(sql4, con=conn) # df_time = pd.read_sql_query(sql5, con=conn) df_store = pd.read_sql_query(sql6, con=conn) df_channel = pd.read_sql_query(sql7, con=conn) df_lineaction = pd.read_sql_query(sql8, con=conn) df_calendar = pd.read_sql_query(sql9, con=conn) conn.close() df_customer.head(2) customer_col=['customer_id','customer_type', 'first_name', 'middle_name', 'household_name', 'last_name', 'personal_email', 'city', 'state', 'zip_code', 'address1', 'country', 'gender', 'phone_number', 'reward_number'] product_col=['product_id', 'product_name', 'product_price', 'department', 'class', 'discount', 'category', 'department_desc', 'department_type', 'product_type', 'manufacturer', 'color'] sales_col = ['store_id', 'customer_id', 'channel_id', 'product_id', 'time_id', 'date_id','order_id', 'line_action', 'discount_amount', 'shipping_amount','transaction_date', 'transaction_amount', 'transaction_type', 'qty_sold'] # time_col = ['time_id', 'hour', 'minute', 'second', 'am_pm'] store_col = ['store_id', 'store_number', 'store_name', 'store_designation', 'store_longitude', 'store_latitude', 'store_manager_name', 'zip_code', 'state_code', 'city', 'street_number', 'street_name', 'store_region', 'store_type', 'address1','sublocationcode', 'channel', 'company_flag', 'kiosk_physical_store', 'sublocation_code'] channel_col = ['channel_id', 'channel_name', 'channel_code'] lineaction_col = ['line_action_code', 'line_action_code_desc', 'load_date', 'catgory', 'sales_type'] calendar_col = ['date_id','calendar_date','calendar_month','day_of_week','calendar_week_number','calendar_month_number','calendar_quarter_number','day_of_month','day_of_quarter','day_of_the_year','us_holiday','lp_holiday','work_day','year','ad_week','ad_week_year','ad_month','lp_day','lp_week','lp_month','lp_year','lp_quarter','event_day'] df_customer=df_customer[customer_col] df_product=df_product[product_col] df_sales=df_sales[sales_col] # df_time = df_time[time_col] df_store = df_store[store_col] df_channel = df_channel[channel_col] df_lineaction = df_lineaction[lineaction_col] df_calendar = df_calendar[calendar_col] # df = pd.read_csv('/content/drive/MyDrive/tbl_m_querygen.csv') import sqlite3 import openai # Connect to SQLite database conn1 = sqlite3.connect('chatgpt.db') cursor1 = conn1.cursor() # Connect to SQLite database conn2 = sqlite3.connect('chatgpt.db') cursor2 = conn2.cursor() # Connect to SQLite database conn3 = sqlite3.connect('chatgpt.db') cursor3 = conn3.cursor() # Connect to SQLite database conn4 = sqlite3.connect('chatgpt.db') cursor4 = conn4.cursor() # Connect to SQLite database conn5 = sqlite3.connect('chatgpt.db') cursor5 = conn5.cursor() # Connect to SQLite database conn5 = sqlite3.connect('chatgpt.db') cursor5 = conn5.cursor() # Connect to SQLite database conn6 = sqlite3.connect('chatgpt.db') cursor6 = conn6.cursor() # Connect to SQLite database conn7 = sqlite3.connect('chatgpt.db') cursor7 = conn7.cursor() # Connect to SQLite database conn8 = sqlite3.connect('chatgpt.db') cursor8 = conn8.cursor() # openai.api_key = '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' # Insert DataFrame into SQLite database df_customer.to_sql('tbl_d_customer', conn1, if_exists='replace', index=False) df_product.to_sql('tbl_d_product', conn2, if_exists='replace', index=False) df_sales.to_sql('tbl_f_sales', conn3, if_exists='replace', index=False) # df_time.to_sql('tbl_d_time', conn4, if_exists='replace', index=False) df_store.to_sql('tbl_d_store', conn5, if_exists='replace', index=False) df_channel.to_sql('tbl_d_channel', conn6, if_exists='replace', index=False) df_lineaction.to_sql('tbl_d_lineaction_code', conn7, if_exists='replace', index=False) df_calendar.to_sql('tbl_d_calendar', conn8, if_exists ='replace',index=False) # Function to get table columns from SQLite database def get_table_columns(table_name1, table_name2): cursor1.execute("PRAGMA table_info({})".format(table_name1)) columns1 = cursor1.fetchall() # print(columns) cursor2.execute("PRAGMA table_info({})".format(table_name2)) columns2 = cursor2.fetchall() return [column[1] for column in columns1], [column[1] for column in columns2] table_name1 = 'tbl_d_customer' table_name2 = 'tbl_d_product' table_name3 = 'tbl_f_sales' # table_name4 = 'tbl_d_time' table_name5 = 'tbl_d_store' table_name6 = 'tbl_d_channel' table_name7 = 'tbl_d_lineaction_code' table_name8 = 'tbl_d_calendar' columns1,columns2 = get_table_columns(table_name1,table_name2) # Function to generate SQL query from input text using ChatGPT def generate_sql_query(text): # prompt = """You are a ChatGPT language model that can generate SQL queries. Please provide a natural language input text, and I will generate the corresponding SQL query and Answer the provided question if possible for you.The table name is {} and the following data:\n {} and corresponding columns are {}.\nInput: {}\nSQL Query:""".format(table_name,read_csv, columns,text) messages.append({"role": "user", "content": text}) # print(prompt) request = openai.ChatCompletion.create( model="gpt-4", messages=messages ) print(request) sql_query = request['choices'][0]['message']['content'] return sql_query text = "for female customer who did a transaction of more than 100 dollars in year 2020 please write sql query ?" schema_name = 'lpdatamart' prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5]) messages = [{"role": "system", "content": prompt}] sql_query=generate_sql_query(text) print("Generated SQL query: ",sql_query) # prompt = """Given an input text, and You will generate the corresponding SQL query. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}.\n""".format(table_name1,df2.loc[:5], table_name2, df3.loc[:5], table_name3, df4.loc[:5]) prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5]) messages = [{"role": "system", "content": prompt}] import time import gradio as gr def CustomChatGPT(user_inp): messages.append({"role": "user", "content": user_inp}) response = openai.ChatCompletion.create( model = "gpt-4", messages = messages ) ChatGPT_reply = response["choices"][0]["message"]["content"] messages.append({"role": "assistant", "content": ChatGPT_reply}) return ChatGPT_reply def respond(message, chat_history): bot_message = CustomChatGPT(message) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history # to test the generated sql query def test_Sql(sql): sql=sql.replace(';', '') sql = sql + ' ' + 'limit 5' sql = str(sql) sql = sqlparse.format(sql, reindent=True, keyword_case='upper') conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db) df = pd.read_sql_query(sql, con=conn) conn.close() return pd.DataFrame(df) admin = os.getenv("admin") paswd = os.getenv("paswd") def same_auth(username, password): if username == admin and password == paswd: return 1 with gr.Blocks() as demo: with gr.Tab("Query Helper"): gr.Markdown("""