rahgadda commited on
Commit
a0a58e9
·
1 Parent(s): 89a1200

Initial Draft

Browse files
Files changed (1) hide show
  1. pages/4_Data_Generation.py +135 -0
pages/4_Data_Generation.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import csv
4
+ import streamlit as st
5
+ import pandas as pd
6
+ from datetime import date
7
+ import pandasql as psql
8
+ import base64
9
+
10
+ ################################
11
+ ######### Variables ############
12
+ ################################
13
+ # -- Loading Variables
14
+ script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
15
+
16
+ # -- Loading Session Data
17
+ if 'project_data' not in st.session_state:
18
+ st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv')
19
+
20
+ if 'mapping_df' not in st.session_state:
21
+ st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])
22
+
23
+ ################################
24
+ ####### GenericFunctions #######
25
+ ################################
26
+ # -- Load Mapping File
27
+ def load_mapping_file():
28
+ if 'project_name' in st.session_state:
29
+ try:
30
+ # print("project_name - "+st.session_state.project_name)
31
+ cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
32
+ file_name = script_directory+'/data/'+str(st.session_state.project_data[cond].Id.values[0])+"_"+st.session_state.project_data[cond].Source.values[0]+"_"+st.session_state.project_data[cond].Destination.values[0]+'.csv'
33
+ # print("file_name - "+file_name)
34
+ st.session_state.mapping_df = pd.read_csv(file_name,sep="|",quoting=csv.QUOTE_NONE)
35
+ except Exception as e:
36
+ st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])
37
+ st.error(f"Unable to load mapping file - {e}")
38
+
39
+ ################################
40
+ ####### Display of data ########
41
+ ################################
42
+ # -- Streamlit Settings
43
+ st.set_page_config(layout='wide')
44
+ st.title("Data Generation")
45
+
46
+ # -- Add Project Dropdown
47
+ st.text("")
48
+ st.text("")
49
+ st.text("")
50
+ col1, col2, col3 = st.columns(3)
51
+ project_name = col1.selectbox(
52
+ 'Select Project',
53
+ st.session_state.project_data['Project'],
54
+ key="project_name",
55
+ on_change=load_mapping_file()
56
+ )
57
+
58
+ # -- Upload Data
59
+ if len(st.session_state.mapping_df)>0:
60
+ st.text("")
61
+ st.text("")
62
+ st.text("")
63
+ col1, col2, col3 = st.columns(3)
64
+
65
+ cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
66
+ result = st.session_state.project_data[cond].Source.values[0]
67
+ with col1:
68
+ source_data_file = st.file_uploader(
69
+ "Source data file name - "+str(result)+".csv",
70
+ type="csv",
71
+ key="source_data_file",
72
+ accept_multiple_files=True
73
+ )
74
+
75
+ # -- Button Show Data
76
+ st.text("")
77
+ st.text("")
78
+ col1, col2, col3 = st.columns([0.3,0.5,2.2])
79
+
80
+ if col1.button("Show Data"):
81
+ if source_data_file is not None:
82
+ for file in source_data_file:
83
+ df = pd.read_csv(file)
84
+
85
+ # Update dataframe with Pandas Mapping Fields
86
+ for index, row in st.session_state.mapping_df.iterrows():
87
+ if row['Type'] == 'Pandas':
88
+ column_name = row['DestinationColumn']
89
+ expression = row['Expression'].replace("'", "")
90
+ df[column_name] = eval(expression)
91
+
92
+ # Creating SQL Statement
93
+ sql_statement = "SELECT "
94
+ for index, row in st.session_state.mapping_df.iterrows():
95
+ destination_column = row['DestinationColumn']
96
+ source_column = row['SourceColumn']
97
+ column_type = row['Type']
98
+ expression = row['Expression'] if 'Expression' in row else None
99
+
100
+ if column_type == 'Constant':
101
+ # Create a dummy column with the provided expression
102
+ sql_statement += str(expression) + ' AS "' + str(destination_column) + '",'
103
+ elif column_type == 'Pandas':
104
+ sql_statement += '"' + str(destination_column) + '" AS "' + str(destination_column) + '",'
105
+ else:
106
+ # Use the source column as-is
107
+ sql_statement += '"' + str(source_column) + '" AS "' + str(destination_column) + '",'
108
+
109
+
110
+ # Remove the trailing comma and space
111
+ sql_statement = sql_statement[:-1]+" from df"
112
+ # st.write(sql_statement+" from df")
113
+
114
+ st.session_state.df = df
115
+ st.session_state.sql_statement = sql_statement
116
+
117
+ # Display Data
118
+ st.dataframe(df)
119
+
120
+ # -- Button Generate Data
121
+ if col2.button("Generate Data"):
122
+ df = st.session_state.df
123
+ if len(df) == 0 :
124
+ st.error("No records available to run query, click on Show Data")
125
+ else:
126
+ sql_query = st.text_area(label="Sql Query", value=st.session_state.sql_statement, key="sql_query", height=200)
127
+ try:
128
+ result_df = psql.sqldf(sql_query, locals())
129
+ st.write("Query Result")
130
+ st.dataframe(result_df)
131
+ csv_data = result_df.to_csv(index=False)
132
+ b64 = base64.b64encode(csv_data.encode()).decode()
133
+ st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True)
134
+ except Exception as e:
135
+ st.error(f"Error executing SQL query: {str(e)}")