Add time series sales data as an example dataset
#2
by
nolanzandi
- opened
- .gitattributes +1 -0
- data_sources/upload_file.py +13 -1
- functions/chat_functions.py +10 -5
- samples/online_retail_data.csv +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
samples/online_retail_data.csv filter=lfs diff=lfs merge=lfs -text
|
data_sources/upload_file.py
CHANGED
|
@@ -1,8 +1,16 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import sqlite3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def process_data_upload(data_file, session_hash):
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
# Read each sheet and store data in a DataFrame
|
| 8 |
#data = df.parse(sheet_name)
|
|
@@ -10,6 +18,10 @@ def process_data_upload(data_file, session_hash):
|
|
| 10 |
# ...
|
| 11 |
df.columns = df.columns.str.replace(' ', '_')
|
| 12 |
df.columns = df.columns.str.replace('/', '_')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
connection = sqlite3.connect(f'data_source_{session_hash}.db')
|
| 15 |
print("Opened database successfully");
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import sqlite3
|
| 3 |
+
import csv
|
| 4 |
+
|
| 5 |
+
def get_delimiter(file_path, bytes = 4096):
|
| 6 |
+
sniffer = csv.Sniffer()
|
| 7 |
+
data = open(file_path, "r").read(bytes)
|
| 8 |
+
delimiter = sniffer.sniff(data).delimiter
|
| 9 |
+
return delimiter
|
| 10 |
|
| 11 |
def process_data_upload(data_file, session_hash):
|
| 12 |
+
delimiter = get_delimiter(data_file)
|
| 13 |
+
df = pd.read_csv(data_file, sep=delimiter)
|
| 14 |
|
| 15 |
# Read each sheet and store data in a DataFrame
|
| 16 |
#data = df.parse(sheet_name)
|
|
|
|
| 18 |
# ...
|
| 19 |
df.columns = df.columns.str.replace(' ', '_')
|
| 20 |
df.columns = df.columns.str.replace('/', '_')
|
| 21 |
+
|
| 22 |
+
for column in df.columns:
|
| 23 |
+
if "date" in column.lower() or "time" in column.lower():
|
| 24 |
+
df[column] = pd.to_datetime(df[column])
|
| 25 |
|
| 26 |
connection = sqlite3.connect(f'data_source_{session_hash}.db')
|
| 27 |
print("Opened database successfully");
|
functions/chat_functions.py
CHANGED
|
@@ -67,18 +67,23 @@ def example_display(input):
|
|
| 67 |
display = True
|
| 68 |
else:
|
| 69 |
display = False
|
| 70 |
-
return gr.update(visible=display)
|
| 71 |
|
| 72 |
css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
|
| 73 |
|
| 74 |
with gr.Blocks(css=css) as demo:
|
| 75 |
title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
|
| 76 |
description = gr.HTML("<p style='text-align:center;'>Upload a CSV file and chat with our virtual data analyst to get insights on your data set</p>")
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
@gr.render(inputs=file_output)
|
| 84 |
def data_options(filename, request: gr.Request):
|
|
|
|
| 67 |
display = True
|
| 68 |
else:
|
| 69 |
display = False
|
| 70 |
+
return [gr.update(visible=display),gr.update(visible=display)]
|
| 71 |
|
| 72 |
css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;}"
|
| 73 |
|
| 74 |
with gr.Blocks(css=css) as demo:
|
| 75 |
title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
|
| 76 |
description = gr.HTML("<p style='text-align:center;'>Upload a CSV file and chat with our virtual data analyst to get insights on your data set</p>")
|
| 77 |
+
example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
|
| 78 |
+
example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
|
| 79 |
+
with gr.Row():
|
| 80 |
+
example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
|
| 81 |
+
example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")
|
| 82 |
+
|
| 83 |
file_output = gr.File(label="CSV File", show_label=True, elem_classes="file_marker", file_types=['.csv'])
|
| 84 |
+
example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
|
| 85 |
+
example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
|
| 86 |
+
file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])
|
| 87 |
|
| 88 |
@gr.render(inputs=file_output)
|
| 89 |
def data_options(filename, request: gr.Request):
|
samples/online_retail_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0d4e4f72b6bc62ce831e8243bc02e491fda0085c1d94f3075063411539b1f04
|
| 3 |
+
size 45580673
|