vindruid commited on
Commit
f37c91c
·
verified ·
1 Parent(s): 46cc960

add gradio application

Browse files
Files changed (1) hide show
  1. app.py +195 -0
app.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import pandas as pd
3
+ import sqlite3
4
+ import os
5
+ import gradio as gr
6
+ from google.api_core import retry
7
+
8
+
9
+ GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
10
+
11
+ genai.configure(api_key=GOOGLE_API_KEY)
12
+
13
+ df = pd.read_excel(
14
+ 'https://public.tableau.com/app/sample-data/sample_-_superstore.xls'
15
+ )
16
+
17
+ # Function to convert column names to snake case
18
+ def to_snake_case(name):
19
+ name = name.lower().replace(' ', '_').replace('-','_')
20
+ return name
21
+
22
+ # Apply the function to rename columns
23
+ df = df.rename(columns=to_snake_case)
24
+
25
+ # Add date abstraction
26
+ df['order_week'] = (pd.to_datetime(df['order_date']) - pd.to_timedelta(df['order_date'].dt.dayofweek, unit='d')).astype(str)
27
+ df['order_year'] = pd.DatetimeIndex(df['order_date']).year
28
+ df['order_month'] = pd.DatetimeIndex(df['order_date']).month
29
+
30
+ # Connect to the SQLite database (or create it if it doesn't exist)
31
+ db_conn = sqlite3.connect('mydatabase.db', check_same_thread=False)
32
+
33
+ # Save the DataFrame to an SQLite table named 'orders'
34
+ df.to_sql('orders', db_conn, if_exists='replace', index=False)
35
+
36
+ # Tools
37
+ def list_tables() -> list[str]:
38
+ """Retrieve the names of all tables in the database."""
39
+ cursor = db_conn.cursor()
40
+
41
+ # Fetch the table names.
42
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
43
+
44
+ tables = cursor.fetchall()
45
+ return [t[0] for t in tables]
46
+
47
+ def describe_table(table_name: str) -> list[tuple[str, str]]:
48
+ """Look up the table schema.
49
+
50
+ Returns:
51
+ List of columns, where each entry is a tuple of (column, type).
52
+ """
53
+
54
+ cursor = db_conn.cursor()
55
+
56
+ cursor.execute(f"PRAGMA table_info({table_name});")
57
+
58
+ schema = cursor.fetchall()
59
+ # [column index, column name, column type, ...]
60
+ return [(col[1], col[2]) for col in schema]
61
+
62
+
63
+ def execute_query(sql: str) -> list[list[str]]:
64
+ """Execute a SELECT statement, returning the results."""
65
+ # print("QUERY:", sql)
66
+ sql = sql.replace("\\", "")
67
+
68
+ cursor = db_conn.cursor()
69
+
70
+ cursor.execute(sql)
71
+ return cursor.fetchall()
72
+
73
+
74
+ def relative_uplift(anchor_value: float, new_value: float) -> float:
75
+ """Calculate relative uplift of new value for comparison in percentages
76
+ if the anchor is negative, make it to absolute first to avoid miscalculation"""
77
+
78
+ return (new_value - anchor_value) / abs(anchor_value)
79
+
80
+ tools = [list_tables, describe_table, execute_query]
81
+
82
+ instruction = """You are a helpful chatbot that can interact with an SQL database for a
83
+ store. You will take the users questions and turn them into SQL queries using the tools
84
+ available. Once you have the information you need, you will answer the user's question using
85
+ the data returned. You are allowed to do multiple query if the question cannot be done in one go.
86
+ Use list_tables to see what tables are present, describe_table to understand
87
+ the schema, and execute_query to issue an SQL SELECT query.
88
+
89
+ You can process one by one, not need to do it all at once.
90
+
91
+ No need to return output like "Here is the result" or "lets find out" Just return the data itself.
92
+ No need to return the question in the output. Just return the answer.
93
+ """
94
+
95
+ def chat_history_result(chat):
96
+ """
97
+ Converts chat history into a tabular format with a primary header indicating the chat context.
98
+
99
+ Parameters:
100
+ - chat: A chat history object containing roles, messages, and function calls.
101
+
102
+ Returns:
103
+ - A pandas DataFrame with columns: 'Chat ID', 'Role', 'Content', 'Type'.
104
+ """
105
+ records = []
106
+ text_result = ""
107
+
108
+ chat_id = 1 # Start chat ID from 1
109
+
110
+ for event in chat.history:
111
+ role = event.role.capitalize()
112
+
113
+ for part in event.parts:
114
+ if txt := part.text:
115
+ records.append({"Chat ID": chat_id, "Type": "Text", "Role": role, "Content": txt, })
116
+ if role == 'Model':
117
+ text_result += txt + "\n"
118
+ elif fn := part.function_call:
119
+ args = ", ".join(f"{key}={val}" for key, val in fn.args.items())
120
+ records.append({"Chat ID": chat_id, "Type": "Function Call", "Role": role, "Content": f"Function call: {fn.name}({args})"})
121
+ elif resp := part.function_response:
122
+ records.append({"Chat ID": chat_id, "Type": "Function Response", "Role": role, "Content": str(resp)})
123
+ # Increment chat ID after processing an event to indicate context change
124
+ chat_id += 1
125
+
126
+ # Create a DataFrame from the records
127
+ df_log = pd.DataFrame(records, columns=["Chat ID", "Role","Type","Content"])
128
+
129
+ return df_log, text_result
130
+
131
+
132
+ def generate_response(prompt):
133
+ model = genai.GenerativeModel(
134
+ "models/gemini-1.5-flash-latest", tools=tools, system_instruction=instruction
135
+ )
136
+
137
+ retry_policy = {"retry": retry.Retry(predicate=retry.if_transient_error)}
138
+
139
+ # Start a chat with automatic function calling enabled.
140
+ chat_model = model.start_chat(enable_automatic_function_calling=True)
141
+
142
+ # split prompy by double enter / new lines and send them one by one
143
+ prompts = prompt.split("\n\n")
144
+ for prompt in prompts:
145
+ _ = chat_model.send_message(prompt, request_options=retry_policy)
146
+
147
+ # text_to_display = response_text
148
+
149
+ df_chat, text_result = chat_history_result(chat_model)
150
+
151
+ text_to_display = text_result
152
+
153
+ return text_to_display, df_chat
154
+
155
+
156
+ initial_prompt = """Question1:
157
+ Who bought the most product by quantity and the total of profit from the person?
158
+ Then Show top 5 item with most profit from that person.
159
+
160
+ Question2:
161
+ Compare profit between 2016 May and 2016 June, show the relative uplift of June profit compared to May profit.
162
+
163
+ Question3:
164
+ What is the city that generate the most profit then show the profit and quantity
165
+ """
166
+ block = gr.Blocks()
167
+
168
+ with block:
169
+ gr.Markdown(
170
+ """
171
+ # Automated Report
172
+ ## Explored by Hervind & Nikolas Ermando
173
+ Opensource Data by Tableau regarding superstore orders
174
+ * can be accessed in https://www.kaggle.com/datasets/vivek468/superstore-dataset-final
175
+ """)
176
+ # prompt_box = gr.Textbox(label="Questions Separate questions by double new line", lines=2)
177
+
178
+ prompt_box = gr.Textbox(label="Questions, (separate question by double enter / new lines)", lines=2, value= initial_prompt)
179
+
180
+ btn = gr.Button(value="Submit")
181
+ text_output = gr.Textbox(label="Results")
182
+ df_output = gr.DataFrame(label="Process Log")
183
+
184
+ btn.click(generate_response, inputs=prompt_box, outputs=[text_output, df_output])
185
+ block.load(fn = generate_response, inputs=prompt_box, outputs=[text_output, df_output])
186
+
187
+ gr.Markdown(
188
+ """
189
+ # To Do:
190
+ * Explore several data and join tables capability
191
+ * In depth Comparison capability between two periods
192
+ * Add showing interactive plot based on the question.
193
+ """)
194
+
195
+ block.launch()