Spaces:
Runtime error
Runtime error
| # !pip install gradio | |
| # !pip install openai | |
| # import openai | |
| import gradio | |
| import pandas as pd | |
| import psycopg2 | |
| import pandas as pd | |
| import openai | |
| import sqlite3 | |
| import psycopg2 | |
| import time | |
| import gradio as gr | |
| import sqlparse | |
| import os | |
| #EA_key | |
| openai.api_key = os.getenv("api_key") | |
| pd.set_option('display.max_columns', None) | |
| pd.set_option('display.max_rows', None) | |
| #database credential | |
| db_name = os.getenv("db_name") | |
| user_db = os.getenv("user_db") | |
| pwd_db = os.getenv("pwd_db") | |
| host_db = os.getenv("host_db") | |
| port_db = os.getenv("port_db") | |
| conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db) | |
| # sql="select master_customer_id, c.gender,c.city_name,c.state_name, c.zip_code,product_name,department,class,category,d.date_value,s.city_name as store_city,s.state_name as store_state,s.zip_code as store_zip,s.store_name,s.opened_dt,s.closed_dt, f.transaction_amt,ch.type from oyster_demo.tbl_d_customer c,oyster_demo.tbl_d_product p,oyster_demo.tbl_f_sales f,oyster_demo.tbl_d_date d, oyster_demo.tbl_d_store s,oyster_demo.tbl_d_channel ch where p.product_id=f.product_id and c.customer_id=f.customer_id and d.date_id=f.date_id and s.store_id=f.store_id and ch.channel_id=f.channel_id" | |
| sql2="""select * from lpdatamart.tbl_d_customer limit 10000""" | |
| sql3="""select * from lpdatamart.tbl_d_product limit 1000""" | |
| sql4="""select * from lpdatamart.tbl_f_sales limit 10000""" | |
| # sql5="""select * from lpdatamart.tbl_d_time limit 10000""" | |
| sql6="""select * from lpdatamart.tbl_d_store limit 10000""" | |
| sql7="""select * from lpdatamart.tbl_d_channel limit 10000""" | |
| sql8="""select * from lpdatamart.tbl_d_lineaction_code limit 10000""" | |
| sql9 = """select * from lpdatamart.tbl_d_calendar limit 10000""" | |
| df_customer = pd.read_sql_query(sql2, con=conn) | |
| df_product = pd.read_sql_query(sql3, con=conn) | |
| df_sales = pd.read_sql_query(sql4, con=conn) | |
| # df_time = pd.read_sql_query(sql5, con=conn) | |
| df_store = pd.read_sql_query(sql6, con=conn) | |
| df_channel = pd.read_sql_query(sql7, con=conn) | |
| df_lineaction = pd.read_sql_query(sql8, con=conn) | |
| df_calendar = pd.read_sql_query(sql9, con=conn) | |
| conn.close() | |
| df_customer.head(2) | |
| customer_col=['customer_id','customer_type', 'first_name', 'middle_name', 'household_name', 'last_name', 'personal_email', 'city', 'state', 'zip_code', 'address1', 'country', 'gender', 'phone_number', 'reward_number'] | |
| product_col=['product_id', 'product_name', 'product_price', 'department', 'class', 'discount', 'category', 'department_desc', 'department_type', 'product_type', 'manufacturer', 'color'] | |
| sales_col = ['store_id', 'customer_id', 'channel_id', 'product_id', 'time_id', 'date_id','order_id', 'line_action', 'discount_amount', 'shipping_amount','transaction_date', 'transaction_amount', 'transaction_type', 'qty_sold'] | |
| # time_col = ['time_id', 'hour', 'minute', 'second', 'am_pm'] | |
| store_col = ['store_id', 'store_number', 'store_name', 'store_designation', 'store_longitude', 'store_latitude', 'store_manager_name', 'zip_code', 'state_code', 'city', 'street_number', 'street_name', 'store_region', 'store_type', 'address1','sublocationcode', 'channel', 'company_flag', 'kiosk_physical_store', 'sublocation_code'] | |
| channel_col = ['channel_id', 'channel_name', 'channel_code'] | |
| lineaction_col = ['line_action_code', 'line_action_code_desc', 'load_date', 'catgory', 'sales_type'] | |
| calendar_col = ['date_id','calendar_date','calendar_month','day_of_week','calendar_week_number','calendar_month_number','calendar_quarter_number','day_of_month','day_of_quarter','day_of_the_year','us_holiday','lp_holiday','work_day','year','ad_week','ad_week_year','ad_month','lp_day','lp_week','lp_month','lp_year','lp_quarter','event_day'] | |
| df_customer=df_customer[customer_col] | |
| df_product=df_product[product_col] | |
| df_sales=df_sales[sales_col] | |
| # df_time = df_time[time_col] | |
| df_store = df_store[store_col] | |
| df_channel = df_channel[channel_col] | |
| df_lineaction = df_lineaction[lineaction_col] | |
| df_calendar = df_calendar[calendar_col] | |
| # df = pd.read_csv('/content/drive/MyDrive/tbl_m_querygen.csv') | |
| import sqlite3 | |
| import openai | |
| # Connect to SQLite database | |
| conn1 = sqlite3.connect('chatgpt.db') | |
| cursor1 = conn1.cursor() | |
| # Connect to SQLite database | |
| conn2 = sqlite3.connect('chatgpt.db') | |
| cursor2 = conn2.cursor() | |
| # Connect to SQLite database | |
| conn3 = sqlite3.connect('chatgpt.db') | |
| cursor3 = conn3.cursor() | |
| # Connect to SQLite database | |
| conn4 = sqlite3.connect('chatgpt.db') | |
| cursor4 = conn4.cursor() | |
| # Connect to SQLite database | |
| conn5 = sqlite3.connect('chatgpt.db') | |
| cursor5 = conn5.cursor() | |
| # Connect to SQLite database | |
| conn5 = sqlite3.connect('chatgpt.db') | |
| cursor5 = conn5.cursor() | |
| # Connect to SQLite database | |
| conn6 = sqlite3.connect('chatgpt.db') | |
| cursor6 = conn6.cursor() | |
| # Connect to SQLite database | |
| conn7 = sqlite3.connect('chatgpt.db') | |
| cursor7 = conn7.cursor() | |
| # Connect to SQLite database | |
| conn8 = sqlite3.connect('chatgpt.db') | |
| cursor8 = conn8.cursor() | |
| # openai.api_key = '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' | |
| # Insert DataFrame into SQLite database | |
| df_customer.to_sql('tbl_d_customer', conn1, if_exists='replace', index=False) | |
| df_product.to_sql('tbl_d_product', conn2, if_exists='replace', index=False) | |
| df_sales.to_sql('tbl_f_sales', conn3, if_exists='replace', index=False) | |
| # df_time.to_sql('tbl_d_time', conn4, if_exists='replace', index=False) | |
| df_store.to_sql('tbl_d_store', conn5, if_exists='replace', index=False) | |
| df_channel.to_sql('tbl_d_channel', conn6, if_exists='replace', index=False) | |
| df_lineaction.to_sql('tbl_d_lineaction_code', conn7, if_exists='replace', index=False) | |
| df_calendar.to_sql('tbl_d_calendar', conn8, if_exists ='replace',index=False) | |
| # Function to get table columns from SQLite database | |
| def get_table_columns(table_name1, table_name2): | |
| cursor1.execute("PRAGMA table_info({})".format(table_name1)) | |
| columns1 = cursor1.fetchall() | |
| # print(columns) | |
| cursor2.execute("PRAGMA table_info({})".format(table_name2)) | |
| columns2 = cursor2.fetchall() | |
| return [column[1] for column in columns1], [column[1] for column in columns2] | |
| table_name1 = 'tbl_d_customer' | |
| table_name2 = 'tbl_d_product' | |
| table_name3 = 'tbl_f_sales' | |
| # table_name4 = 'tbl_d_time' | |
| table_name5 = 'tbl_d_store' | |
| table_name6 = 'tbl_d_channel' | |
| table_name7 = 'tbl_d_lineaction_code' | |
| table_name8 = 'tbl_d_calendar' | |
| columns1,columns2 = get_table_columns(table_name1,table_name2) | |
| # Function to generate SQL query from input text using ChatGPT | |
| def generate_sql_query(text): | |
| # prompt = """You are a ChatGPT language model that can generate SQL queries. Please provide a natural language input text, and I will generate the corresponding SQL query and Answer the provided question if possible for you.The table name is {} and the following data:\n {} and corresponding columns are {}.\nInput: {}\nSQL Query:""".format(table_name,read_csv, columns,text) | |
| messages.append({"role": "user", "content": text}) | |
| # print(prompt) | |
| request = openai.ChatCompletion.create( | |
| model="gpt-4", | |
| messages=messages | |
| ) | |
| print(request) | |
| sql_query = request['choices'][0]['message']['content'] | |
| return sql_query | |
| text = "for female customer who did a transaction of more than 100 dollars in year 2020 please write sql query ?" | |
| schema_name = 'lpdatamart' | |
| prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5]) | |
| messages = [{"role": "system", "content": prompt}] | |
| sql_query=generate_sql_query(text) | |
| print("Generated SQL query: ",sql_query) | |
| # prompt = """Given an input text, and You will generate the corresponding SQL query. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}.\n""".format(table_name1,df2.loc[:5], table_name2, df3.loc[:5], table_name3, df4.loc[:5]) | |
| prompt = """Given an input text, and You will generate the corresponding SQL query. The schema name is {}. The first table name is {} and the following data:\n {}. The second table name is {} and the following data for second table:\n {}. The third table name is {} and the following data for third table:\n {}. The fourth table name is {} and the following data for fourth table:\n {}. The fifth table name is {} and the following data for fifth table:\n {}. The sixth table name is {} and the following data for sixth table:\n {}. The seventh table name is {} and the following data for seventh table:\n {} \n""".format(schema_name,table_name1,df_customer.loc[:5], table_name2, df_product.loc[:5], table_name3, df_sales.loc[:5], table_name5, df_store.loc[:5], table_name6, df_channel.loc[:5],table_name7, df_lineaction.loc[:5], table_name8, df_calendar.loc[:5]) | |
| messages = [{"role": "system", "content": prompt}] | |
| import time | |
| import gradio as gr | |
| def CustomChatGPT(user_inp): | |
| messages.append({"role": "user", "content": user_inp}) | |
| response = openai.ChatCompletion.create( | |
| model = "gpt-4", | |
| messages = messages | |
| ) | |
| ChatGPT_reply = response["choices"][0]["message"]["content"] | |
| messages.append({"role": "assistant", "content": ChatGPT_reply}) | |
| return ChatGPT_reply | |
| def respond(message, chat_history): | |
| bot_message = CustomChatGPT(message) | |
| chat_history.append((message, bot_message)) | |
| time.sleep(2) | |
| return "", chat_history | |
| # to test the generated sql query | |
| def test_Sql(sql): | |
| sql=sql.replace(';', '') | |
| sql = sql + ' ' + 'limit 5' | |
| sql = str(sql) | |
| sql = sqlparse.format(sql, reindent=True, keyword_case='upper') | |
| conn = psycopg2.connect(database=db_name, user = user_db, password = pwd_db, host = host_db, port = port_db) | |
| df = pd.read_sql_query(sql, con=conn) | |
| conn.close() | |
| return pd.DataFrame(df) | |
| admin = os.getenv("admin") | |
| paswd = os.getenv("paswd") | |
| def same_auth(username, password): | |
| if username == admin and password == paswd: | |
| return 1 | |
| with gr.Blocks() as demo: | |
| with gr.Tab("Query Helper"): | |
| gr.Markdown("""<h1><center> Query Helper</center></h1>""") | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.ClearButton([msg, chatbot]) | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| with gr.Tab("Run Query"): | |
| # gr.Markdown("""<h1><center> Run Query </center></h1>""") | |
| text_input = gr.Textbox(label = 'Input SQL Query', placeholder="Write your SQL query here ...") | |
| text_output = gr.Textbox(label = 'Result') | |
| text_button = gr.Button("RUN QUERY") | |
| clear = gr.ClearButton([text_input, text_output]) | |
| text_button.click(test_Sql, inputs=text_input, outputs=text_output) | |
| demo.launch(share=True, auth=same_auth) |