Spaces:
Sleeping
Sleeping
add gradio application
Browse files
app.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import sqlite3
|
| 4 |
+
import os
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from google.api_core import retry
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
|
| 10 |
+
|
| 11 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 12 |
+
|
| 13 |
+
df = pd.read_excel(
|
| 14 |
+
'https://public.tableau.com/app/sample-data/sample_-_superstore.xls'
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# Function to convert column names to snake case
|
| 18 |
+
def to_snake_case(name):
|
| 19 |
+
name = name.lower().replace(' ', '_').replace('-','_')
|
| 20 |
+
return name
|
| 21 |
+
|
| 22 |
+
# Apply the function to rename columns
|
| 23 |
+
df = df.rename(columns=to_snake_case)
|
| 24 |
+
|
| 25 |
+
# Add date abstraction
|
| 26 |
+
df['order_week'] = (pd.to_datetime(df['order_date']) - pd.to_timedelta(df['order_date'].dt.dayofweek, unit='d')).astype(str)
|
| 27 |
+
df['order_year'] = pd.DatetimeIndex(df['order_date']).year
|
| 28 |
+
df['order_month'] = pd.DatetimeIndex(df['order_date']).month
|
| 29 |
+
|
| 30 |
+
# Connect to the SQLite database (or create it if it doesn't exist)
|
| 31 |
+
db_conn = sqlite3.connect('mydatabase.db', check_same_thread=False)
|
| 32 |
+
|
| 33 |
+
# Save the DataFrame to an SQLite table named 'orders'
|
| 34 |
+
df.to_sql('orders', db_conn, if_exists='replace', index=False)
|
| 35 |
+
|
| 36 |
+
# Tools
|
| 37 |
+
def list_tables() -> list[str]:
|
| 38 |
+
"""Retrieve the names of all tables in the database."""
|
| 39 |
+
cursor = db_conn.cursor()
|
| 40 |
+
|
| 41 |
+
# Fetch the table names.
|
| 42 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
| 43 |
+
|
| 44 |
+
tables = cursor.fetchall()
|
| 45 |
+
return [t[0] for t in tables]
|
| 46 |
+
|
| 47 |
+
def describe_table(table_name: str) -> list[tuple[str, str]]:
|
| 48 |
+
"""Look up the table schema.
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
List of columns, where each entry is a tuple of (column, type).
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
cursor = db_conn.cursor()
|
| 55 |
+
|
| 56 |
+
cursor.execute(f"PRAGMA table_info({table_name});")
|
| 57 |
+
|
| 58 |
+
schema = cursor.fetchall()
|
| 59 |
+
# [column index, column name, column type, ...]
|
| 60 |
+
return [(col[1], col[2]) for col in schema]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def execute_query(sql: str) -> list[list[str]]:
|
| 64 |
+
"""Execute a SELECT statement, returning the results."""
|
| 65 |
+
# print("QUERY:", sql)
|
| 66 |
+
sql = sql.replace("\\", "")
|
| 67 |
+
|
| 68 |
+
cursor = db_conn.cursor()
|
| 69 |
+
|
| 70 |
+
cursor.execute(sql)
|
| 71 |
+
return cursor.fetchall()
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def relative_uplift(anchor_value: float, new_value: float) -> float:
|
| 75 |
+
"""Calculate relative uplift of new value for comparison in percentages
|
| 76 |
+
if the anchor is negative, make it to absolute first to avoid miscalculation"""
|
| 77 |
+
|
| 78 |
+
return (new_value - anchor_value) / abs(anchor_value)
|
| 79 |
+
|
| 80 |
+
tools = [list_tables, describe_table, execute_query]
|
| 81 |
+
|
| 82 |
+
instruction = """You are a helpful chatbot that can interact with an SQL database for a
|
| 83 |
+
store. You will take the users questions and turn them into SQL queries using the tools
|
| 84 |
+
available. Once you have the information you need, you will answer the user's question using
|
| 85 |
+
the data returned. You are allowed to do multiple query if the question cannot be done in one go.
|
| 86 |
+
Use list_tables to see what tables are present, describe_table to understand
|
| 87 |
+
the schema, and execute_query to issue an SQL SELECT query.
|
| 88 |
+
|
| 89 |
+
You can process one by one, not need to do it all at once.
|
| 90 |
+
|
| 91 |
+
No need to return output like "Here is the result" or "lets find out" Just return the data itself.
|
| 92 |
+
No need to return the question in the output. Just return the answer.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def chat_history_result(chat):
|
| 96 |
+
"""
|
| 97 |
+
Converts chat history into a tabular format with a primary header indicating the chat context.
|
| 98 |
+
|
| 99 |
+
Parameters:
|
| 100 |
+
- chat: A chat history object containing roles, messages, and function calls.
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
- A pandas DataFrame with columns: 'Chat ID', 'Role', 'Content', 'Type'.
|
| 104 |
+
"""
|
| 105 |
+
records = []
|
| 106 |
+
text_result = ""
|
| 107 |
+
|
| 108 |
+
chat_id = 1 # Start chat ID from 1
|
| 109 |
+
|
| 110 |
+
for event in chat.history:
|
| 111 |
+
role = event.role.capitalize()
|
| 112 |
+
|
| 113 |
+
for part in event.parts:
|
| 114 |
+
if txt := part.text:
|
| 115 |
+
records.append({"Chat ID": chat_id, "Type": "Text", "Role": role, "Content": txt, })
|
| 116 |
+
if role == 'Model':
|
| 117 |
+
text_result += txt + "\n"
|
| 118 |
+
elif fn := part.function_call:
|
| 119 |
+
args = ", ".join(f"{key}={val}" for key, val in fn.args.items())
|
| 120 |
+
records.append({"Chat ID": chat_id, "Type": "Function Call", "Role": role, "Content": f"Function call: {fn.name}({args})"})
|
| 121 |
+
elif resp := part.function_response:
|
| 122 |
+
records.append({"Chat ID": chat_id, "Type": "Function Response", "Role": role, "Content": str(resp)})
|
| 123 |
+
# Increment chat ID after processing an event to indicate context change
|
| 124 |
+
chat_id += 1
|
| 125 |
+
|
| 126 |
+
# Create a DataFrame from the records
|
| 127 |
+
df_log = pd.DataFrame(records, columns=["Chat ID", "Role","Type","Content"])
|
| 128 |
+
|
| 129 |
+
return df_log, text_result
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def generate_response(prompt):
|
| 133 |
+
model = genai.GenerativeModel(
|
| 134 |
+
"models/gemini-1.5-flash-latest", tools=tools, system_instruction=instruction
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
retry_policy = {"retry": retry.Retry(predicate=retry.if_transient_error)}
|
| 138 |
+
|
| 139 |
+
# Start a chat with automatic function calling enabled.
|
| 140 |
+
chat_model = model.start_chat(enable_automatic_function_calling=True)
|
| 141 |
+
|
| 142 |
+
# split prompy by double enter / new lines and send them one by one
|
| 143 |
+
prompts = prompt.split("\n\n")
|
| 144 |
+
for prompt in prompts:
|
| 145 |
+
_ = chat_model.send_message(prompt, request_options=retry_policy)
|
| 146 |
+
|
| 147 |
+
# text_to_display = response_text
|
| 148 |
+
|
| 149 |
+
df_chat, text_result = chat_history_result(chat_model)
|
| 150 |
+
|
| 151 |
+
text_to_display = text_result
|
| 152 |
+
|
| 153 |
+
return text_to_display, df_chat
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
initial_prompt = """Question1:
|
| 157 |
+
Who bought the most product by quantity and the total of profit from the person?
|
| 158 |
+
Then Show top 5 item with most profit from that person.
|
| 159 |
+
|
| 160 |
+
Question2:
|
| 161 |
+
Compare profit between 2016 May and 2016 June, show the relative uplift of June profit compared to May profit.
|
| 162 |
+
|
| 163 |
+
Question3:
|
| 164 |
+
What is the city that generate the most profit then show the profit and quantity
|
| 165 |
+
"""
|
| 166 |
+
block = gr.Blocks()
|
| 167 |
+
|
| 168 |
+
with block:
|
| 169 |
+
gr.Markdown(
|
| 170 |
+
"""
|
| 171 |
+
# Automated Report
|
| 172 |
+
## Explored by Hervind & Nikolas Ermando
|
| 173 |
+
Opensource Data by Tableau regarding superstore orders
|
| 174 |
+
* can be accessed in https://www.kaggle.com/datasets/vivek468/superstore-dataset-final
|
| 175 |
+
""")
|
| 176 |
+
# prompt_box = gr.Textbox(label="Questions Separate questions by double new line", lines=2)
|
| 177 |
+
|
| 178 |
+
prompt_box = gr.Textbox(label="Questions, (separate question by double enter / new lines)", lines=2, value= initial_prompt)
|
| 179 |
+
|
| 180 |
+
btn = gr.Button(value="Submit")
|
| 181 |
+
text_output = gr.Textbox(label="Results")
|
| 182 |
+
df_output = gr.DataFrame(label="Process Log")
|
| 183 |
+
|
| 184 |
+
btn.click(generate_response, inputs=prompt_box, outputs=[text_output, df_output])
|
| 185 |
+
block.load(fn = generate_response, inputs=prompt_box, outputs=[text_output, df_output])
|
| 186 |
+
|
| 187 |
+
gr.Markdown(
|
| 188 |
+
"""
|
| 189 |
+
# To Do:
|
| 190 |
+
* Explore several data and join tables capability
|
| 191 |
+
* In depth Comparison capability between two periods
|
| 192 |
+
* Add showing interactive plot based on the question.
|
| 193 |
+
""")
|
| 194 |
+
|
| 195 |
+
block.launch()
|