rahgadda commited on
Commit
81edf33
·
1 Parent(s): 176326d

Initial Draft

Browse files
Files changed (1) hide show
  1. pages/2_Data _Play_Ground.py +198 -0
pages/2_Data _Play_Ground.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import io
4
+ import re
5
+ import base64
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import pandasql as psql
9
+
10
+ ################################
11
+ ######### Variables ############
12
+ ################################
13
+ # -- Loading Variables
14
+ script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
15
+ file_details = pd.DataFrame(columns=['file_name', 'data'])
16
+
17
+ # -- Loading Session Data
18
+ if 'project_data' not in st.session_state:
19
+ st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv')
20
+
21
+ if 'global_dataframe' not in st.session_state:
22
+ st.session_state.global_dataframe=file_details
23
+
24
+ if 'load_sql' not in st.session_state:
25
+ st.session_state.load_sql=False
26
+
27
+ if 'run_sql' not in st.session_state:
28
+ st.session_state.run_sql=False
29
+
30
+ ################################
31
+ ####### GenericFunctions #######
32
+ ################################
33
+ # -- Create Dynamic Columns
34
+ def generate_column_names(end):
35
+ if 1 > end:
36
+ raise ValueError("End value must be grater than 1")
37
+
38
+ column_names = [f"Col{i}" for i in range(1, end+2)]
39
+ return column_names
40
+
41
+ # -- Add missing separator
42
+ def add_missing_separators(file_data,separator,max_header_count):
43
+ # Create a list to hold the modified rows
44
+ modified_rows = []
45
+
46
+ for line in file_data:
47
+
48
+ # Count the occurrences of the separator
49
+ count = line.count(separator)
50
+
51
+ # Append the separator if the count is less than the max_header_count
52
+ if count < max_header_count:
53
+ separator_str=separator * (max_header_count - count)
54
+ line = line + separator_str
55
+
56
+ # Added modified line
57
+ modified_rows.append(line)
58
+
59
+ return modified_rows
60
+
61
+ # -- Create global dataframes
62
+ def create_global_df(sep=",", usecols=None, max_header_count=1):
63
+ file_details = pd.DataFrame(columns=['file_name','data'])
64
+ try:
65
+ if uploaded_files is not None:
66
+ for file in uploaded_files:
67
+ if usecols is not None:
68
+ file_data = io.StringIO(file.read().decode())
69
+ modified_rows = add_missing_separators(file_data, sep,max_header_count)
70
+ df = pd.DataFrame(each_row.split(sep) for each_row in modified_rows)
71
+ df.columns = usecols
72
+ else:
73
+ df = pd.read_csv(file, sep=sep)
74
+
75
+ pattern = r'([^/]+)\.csv$'
76
+ match = re.search(pattern, file.name)
77
+ file_name = match.group(1)
78
+ file_details.loc[len(file_details)] = {
79
+ 'file_name':file_name,
80
+ 'data':df
81
+ }
82
+
83
+ st.session_state.global_dataframe = file_details
84
+ except Exception as e:
85
+ st.error(f"Error processing csv: {str(e)}")
86
+ raise e
87
+
88
+ # -- Load global dataframes
89
+ def load_global_df():
90
+ if st.session_state.header:
91
+ print("Added Headers")
92
+ usecols = generate_column_names(st.session_state.header_count)
93
+ create_global_df(sep,usecols,st.session_state.header_count)
94
+ else:
95
+ print("No Headers Added")
96
+ create_global_df(sep)
97
+
98
+ # -- Run SQL Data
99
+ def run_sql_df():
100
+ for index, row in st.session_state.global_dataframe.iterrows():
101
+ globals()['%s' % row['file_name']] = row['data']
102
+
103
+ try:
104
+ sql_query = st.text_area(label="Sql Query", value="", key="sql_query", height=200)
105
+
106
+ if st.button("Run SQL Query"):
107
+ result_df = psql.sqldf(sql_query, globals())
108
+ st.write("Query Result")
109
+ st.dataframe(result_df)
110
+
111
+ csv_data = result_df.to_csv(index=False)
112
+ b64 = base64.b64encode(csv_data.encode()).decode()
113
+ st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True)
114
+
115
+ except Exception as e:
116
+ st.error(f"Error executing SQL query: {str(e)}")
117
+
118
+ ################################
119
+ ####### Display of data ########
120
+ ################################
121
+ # -- Streamlit Settings
122
+ st.set_page_config(layout='wide')
123
+ st.title("Data Play Ground")
124
+
125
+ # -- Delimiter
126
+ st.text("")
127
+ st.text("")
128
+ st.text("")
129
+ col1, col2, col3 = st.columns(3)
130
+ delimiter = col1.selectbox(
131
+ label="File Delimiter",
132
+ options=[",","|"],
133
+ key="delimiter"
134
+ )
135
+
136
+ # -- Upload Sample Files
137
+ st.text("")
138
+ st.text("")
139
+ col1, col2, col3, col4 = st.columns([1,0.3,0.7,1])
140
+ uploaded_files = col1.file_uploader(
141
+ "Choose a file",
142
+ type="csv",
143
+ key="uploaded_files",
144
+ accept_multiple_files=True
145
+ )
146
+
147
+ # -- Add header Indicator
148
+ header=col3.checkbox(
149
+ label='Add Header',
150
+ key="header"
151
+ )
152
+
153
+ # -- Dynamic Headers Count
154
+ if header:
155
+ header_count=col4.number_input(
156
+ label="No of Header",
157
+ value=2,
158
+ key="header_count",
159
+ min_value=1,
160
+ max_value=100,
161
+ step=1
162
+ )
163
+
164
+ # -- Load Data
165
+ st.text("")
166
+ col1, col2, col3 = st.columns([1,1,8])
167
+ sep = st.session_state.delimiter
168
+ if col1.button("Load Data"):
169
+ st.session_state.load_sql=True
170
+ st.session_state.run_sql=False
171
+
172
+ load_global_df()
173
+
174
+ # -- Run SQL Query
175
+ if col2.button("SQL"):
176
+ st.session_state.load_sql=False
177
+ st.session_state.run_sql=True
178
+
179
+ run_sql_df()
180
+
181
+ # -- Display SQL Query Data
182
+ if st.session_state.run_sql:
183
+ run_sql_df()
184
+
185
+ # -- Display Loaded Data
186
+ if (len(st.session_state.global_dataframe)>0 and st.session_state.load_sql):
187
+ # print("Count of stored files - "+str(len(st.session_state.global_dataframe)))
188
+ col1, col2, col3 = st.columns(3)
189
+ col1.selectbox(
190
+ label="Select Table Name",
191
+ key="table_name",
192
+ options=st.session_state.global_dataframe['file_name']
193
+ )
194
+
195
+ for index, row in st.session_state.global_dataframe.iterrows():
196
+ globals()['%s' % row['file_name']] = row['data']
197
+
198
+ st.dataframe(psql.sqldf("select * from "+st.session_state.table_name, globals()))