qwekuaryee commited on
Commit
d73f855
·
verified ·
1 Parent(s): a94edc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -100
app.py CHANGED
@@ -5,232 +5,173 @@ from openpyxl.utils.dataframe import dataframe_to_rows
5
  import tempfile
6
 
7
  def process_file(file):
8
- # Read the uploaded Excel file into a DataFrame
9
  df = pd.read_excel(file.name)
10
-
11
- # Replace 0s with NaN
12
  df.replace(0, pd.NA, inplace=True)
13
-
14
- # Save the intermediate Excel file
15
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
16
  intermediate_file_path = tmp.name
17
  df.to_excel(intermediate_file_path, index=False)
18
 
19
- # Load the workbook
20
  wb = load_workbook(intermediate_file_path)
21
 
22
- # Iterate through each sheet in the workbook
23
  for sheet_name in wb.sheetnames:
24
  ws = wb[sheet_name]
25
-
26
- # Convert the sheet to a DataFrame
27
  data = ws.values
28
  columns = next(data)[0:]
29
  df = pd.DataFrame(data, columns=columns)
30
 
31
- # Get unique states
32
  unique_states = df['Name of State'].unique()
33
 
34
  for state in unique_states:
35
  state_str = str(state)
36
- # Filter data for the current state
37
  state_data = df[df['Name of State'] == state]
38
-
39
- # Get unique channels within the state
40
  unique_channels = state_data['Outlet Channel'].unique()
41
-
42
- # Initialize a list to hold mode data for each channel
43
  mode_data = []
44
 
45
  for channel in unique_channels:
46
  channel_str = str(channel)
47
- # Filter data for the current channel within the state
48
  channel_data = state_data[state_data['Outlet Channel'] == channel]
49
-
50
- # Calculate the count of non-null values for each column
51
  count_series = channel_data.count()
52
-
53
-
54
- # Find the mode for each column
55
  mode_series = channel_data.mode().iloc[0]
56
 
57
-
58
- combined_series = pd.concat([count_series.rename('COUNT'), mode_series.rename('MODE')], axis=1).T
59
-
60
- # Add the state and channel information to the mode data
 
 
 
 
 
 
 
61
  combined_series.loc[:,'STATE'] = state_str
62
  combined_series.loc[:,'CHANNEL'] = channel_str
63
- #mode_series['COUNT'] = record_count
64
  mode_data.append(combined_series)
65
 
66
- # Convert the mode data list to a DataFrame
67
  mode_df = pd.concat(mode_data)
68
-
69
- # Create a new sheet for the state
70
  new_ws = wb.create_sheet(title=state_str)
71
 
72
- # Write the mode data to the new sheet
73
  for r in dataframe_to_rows(mode_df, index=False, header=True):
74
  new_ws.append(r)
75
 
76
- # Save the workbook to a new file
77
  output_file_path = 'state_and_channel_modes.xlsx'
78
  wb.save(output_file_path)
79
 
80
  return output_file_path
81
 
82
  def process_regional(file):
83
- # Read the uploaded Excel file into a DataFrame
84
  df = pd.read_excel(file.name)
85
-
86
- # Replace 0s with NaN
87
  df.replace(0, pd.NA, inplace=True)
88
-
89
- # Save the intermediate Excel file
90
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
91
  intermediate_file_path = tmp.name
92
  df.to_excel(intermediate_file_path, index=False)
93
 
94
- # Load the workbook
95
  wb = load_workbook(intermediate_file_path)
96
 
97
- # Iterate through each sheet in the workbook
98
  for sheet_name in wb.sheetnames:
99
  ws = wb[sheet_name]
100
-
101
- # Convert the sheet to a DataFrame
102
  data = ws.values
103
  columns = next(data)[0:]
104
  df = pd.DataFrame(data, columns=columns)
105
 
106
- # Get unique states
107
  unique_states = df['Region'].unique()
108
 
109
  for state in unique_states:
110
  state_str = str(state)
111
- # Filter data for the current state
112
  state_data = df[df['Region'] == state]
113
-
114
- # Get unique channels within the state
115
  unique_channels = state_data['Outlet Channel'].unique()
116
-
117
- # Initialize a list to hold mode data for each channel
118
  mode_data = []
119
 
120
  for channel in unique_channels:
121
  channel_str = str(channel)
122
- # Filter data for the current channel within the state
123
  channel_data = state_data[state_data['Outlet Channel'] == channel]
124
-
125
- # Calculate the count of non-null values for each column
126
  count_series = channel_data.count()
127
-
128
-
129
- # Find the mode for each column
130
  mode_series = channel_data.mode().iloc[0]
131
 
132
-
133
- combined_series = pd.concat([count_series.rename('COUNT'), mode_series.rename('MODE')], axis=1).T
134
-
135
- # Add the state and channel information to the mode data
 
 
 
 
 
 
 
136
  combined_series.loc[:,'STATE'] = state_str
137
  combined_series.loc[:,'CHANNEL'] = channel_str
138
- #mode_series['COUNT'] = record_count
139
  mode_data.append(combined_series)
140
 
141
- # Convert the mode data list to a DataFrame
142
  mode_df = pd.concat(mode_data)
143
-
144
- # Create a new sheet for the state
145
  new_ws = wb.create_sheet(title=state_str)
146
 
147
- # Write the mode data to the new sheet
148
  for r in dataframe_to_rows(mode_df, index=False, header=True):
149
  new_ws.append(r)
150
 
151
- # Save the workbook to a new file
152
  output_file_path = 'regional.xlsx'
153
  wb.save(output_file_path)
154
 
155
  return output_file_path
156
 
157
  def process_national(file):
158
- # Read the uploaded Excel file into a DataFrame
159
  df = pd.read_excel(file.name)
160
-
161
- # Replace 0s with NaN
162
  df.replace(0, pd.NA, inplace=True)
163
-
164
- # Save the intermediate Excel file
165
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
166
  intermediate_file_path = tmp.name
167
  df.to_excel(intermediate_file_path, index=False)
168
 
169
- # Load the workbook
170
  wb = load_workbook(intermediate_file_path)
171
 
172
- # Iterate through each sheet in the workbook
173
  for sheet_name in wb.sheetnames:
174
  ws = wb[sheet_name]
175
-
176
- # Convert the sheet to a DataFrame
177
  data = ws.values
178
  columns = next(data)[0:]
179
  df = pd.DataFrame(data, columns=columns)
180
 
181
- # Get unique states
182
  unique_states = df['Nation'].unique()
183
 
184
  for state in unique_states:
185
  state_str = str(state)
186
- # Filter data for the current state
187
  state_data = df[df['Nation'] == state]
188
-
189
- # Get unique channels within the state
190
  unique_channels = state_data['Outlet Channel'].unique()
191
-
192
- # Initialize a list to hold mode data for each channel
193
  mode_data = []
194
 
195
  for channel in unique_channels:
196
  channel_str = str(channel)
197
- # Filter data for the current channel within the state
198
  channel_data = state_data[state_data['Outlet Channel'] == channel]
199
-
200
- # Calculate the count of non-null values for each column
201
  count_series = channel_data.count()
202
-
203
-
204
- # Find the mode for each column
205
  mode_series = channel_data.mode().iloc[0]
206
 
207
-
208
- combined_series = pd.concat([count_series.rename('COUNT'), mode_series.rename('MODE')], axis=1).T
209
-
210
- # Add the state and channel information to the mode data
 
 
 
 
 
 
 
211
  combined_series.loc[:,'STATE'] = state_str
212
  combined_series.loc[:,'CHANNEL'] = channel_str
213
- #mode_series['COUNT'] = record_count
214
  mode_data.append(combined_series)
215
 
216
- # Convert the mode data list to a DataFrame
217
  mode_df = pd.concat(mode_data)
218
-
219
- # Create a new sheet for the state
220
  new_ws = wb.create_sheet(title=state_str)
221
 
222
- # Write the mode data to the new sheet
223
  for r in dataframe_to_rows(mode_df, index=False, header=True):
224
  new_ws.append(r)
225
 
226
- # Save the workbook to a new file
227
  output_file_path = 'national.xlsx'
228
  wb.save(output_file_path)
229
 
230
  return output_file_path
231
 
232
-
233
- # Set up the Gradio interface
234
  iface = gr.Interface(
235
  fn=process_file,
236
  inputs=gr.File(file_types=[".xlsx"]),
@@ -244,16 +185,15 @@ iface2 = gr.Interface(
244
  inputs=gr.File(file_types=[".xlsx"]),
245
  outputs=gr.File(),
246
  title="Excel File Processor",
247
- description="Upload an Excel file to process it and generate a new file with state and channel modes."
248
  )
249
 
250
- iface3= gr.Interface(
251
  fn=process_national,
252
  inputs=gr.File(file_types=[".xlsx"]),
253
  outputs=gr.File(),
254
  title="Excel File Processor",
255
- description="Upload an Excel file to process it and generate a new file with state and channel modes."
256
  )
257
 
258
- # Launch the Gradio interface
259
- gr.TabbedInterface([iface3,iface2,iface],tab_names=['National','Region','State']).launch()
 
5
  import tempfile
6
 
7
  def process_file(file):
 
8
  df = pd.read_excel(file.name)
 
 
9
  df.replace(0, pd.NA, inplace=True)
10
+
 
11
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
12
  intermediate_file_path = tmp.name
13
  df.to_excel(intermediate_file_path, index=False)
14
 
 
15
  wb = load_workbook(intermediate_file_path)
16
 
 
17
  for sheet_name in wb.sheetnames:
18
  ws = wb[sheet_name]
 
 
19
  data = ws.values
20
  columns = next(data)[0:]
21
  df = pd.DataFrame(data, columns=columns)
22
 
 
23
  unique_states = df['Name of State'].unique()
24
 
25
  for state in unique_states:
26
  state_str = str(state)
 
27
  state_data = df[df['Name of State'] == state]
 
 
28
  unique_channels = state_data['Outlet Channel'].unique()
 
 
29
  mode_data = []
30
 
31
  for channel in unique_channels:
32
  channel_str = str(channel)
 
33
  channel_data = state_data[state_data['Outlet Channel'] == channel]
 
 
34
  count_series = channel_data.count()
 
 
 
35
  mode_series = channel_data.mode().iloc[0]
36
 
37
+ numeric_columns = channel_data.select_dtypes(include='number').columns
38
+ max_series = channel_data[numeric_columns].max()
39
+ min_series = channel_data[numeric_columns].min()
40
+
41
+ combined_series = pd.concat([
42
+ count_series.rename('COUNT'),
43
+ mode_series.rename('MODE'),
44
+ max_series.rename('MAX'),
45
+ min_series.rename('MIN')
46
+ ], axis=1).T
47
+
48
  combined_series.loc[:,'STATE'] = state_str
49
  combined_series.loc[:,'CHANNEL'] = channel_str
 
50
  mode_data.append(combined_series)
51
 
 
52
  mode_df = pd.concat(mode_data)
 
 
53
  new_ws = wb.create_sheet(title=state_str)
54
 
 
55
  for r in dataframe_to_rows(mode_df, index=False, header=True):
56
  new_ws.append(r)
57
 
 
58
  output_file_path = 'state_and_channel_modes.xlsx'
59
  wb.save(output_file_path)
60
 
61
  return output_file_path
62
 
63
  def process_regional(file):
 
64
  df = pd.read_excel(file.name)
 
 
65
  df.replace(0, pd.NA, inplace=True)
66
+
 
67
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
68
  intermediate_file_path = tmp.name
69
  df.to_excel(intermediate_file_path, index=False)
70
 
 
71
  wb = load_workbook(intermediate_file_path)
72
 
 
73
  for sheet_name in wb.sheetnames:
74
  ws = wb[sheet_name]
 
 
75
  data = ws.values
76
  columns = next(data)[0:]
77
  df = pd.DataFrame(data, columns=columns)
78
 
 
79
  unique_states = df['Region'].unique()
80
 
81
  for state in unique_states:
82
  state_str = str(state)
 
83
  state_data = df[df['Region'] == state]
 
 
84
  unique_channels = state_data['Outlet Channel'].unique()
 
 
85
  mode_data = []
86
 
87
  for channel in unique_channels:
88
  channel_str = str(channel)
 
89
  channel_data = state_data[state_data['Outlet Channel'] == channel]
 
 
90
  count_series = channel_data.count()
 
 
 
91
  mode_series = channel_data.mode().iloc[0]
92
 
93
+ numeric_columns = channel_data.select_dtypes(include='number').columns
94
+ max_series = channel_data[numeric_columns].max()
95
+ min_series = channel_data[numeric_columns].min()
96
+
97
+ combined_series = pd.concat([
98
+ count_series.rename('COUNT'),
99
+ mode_series.rename('MODE'),
100
+ max_series.rename('MAX'),
101
+ min_series.rename('MIN')
102
+ ], axis=1).T
103
+
104
  combined_series.loc[:,'STATE'] = state_str
105
  combined_series.loc[:,'CHANNEL'] = channel_str
 
106
  mode_data.append(combined_series)
107
 
 
108
  mode_df = pd.concat(mode_data)
 
 
109
  new_ws = wb.create_sheet(title=state_str)
110
 
 
111
  for r in dataframe_to_rows(mode_df, index=False, header=True):
112
  new_ws.append(r)
113
 
 
114
  output_file_path = 'regional.xlsx'
115
  wb.save(output_file_path)
116
 
117
  return output_file_path
118
 
119
  def process_national(file):
 
120
  df = pd.read_excel(file.name)
 
 
121
  df.replace(0, pd.NA, inplace=True)
122
+
 
123
  with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
124
  intermediate_file_path = tmp.name
125
  df.to_excel(intermediate_file_path, index=False)
126
 
 
127
  wb = load_workbook(intermediate_file_path)
128
 
 
129
  for sheet_name in wb.sheetnames:
130
  ws = wb[sheet_name]
 
 
131
  data = ws.values
132
  columns = next(data)[0:]
133
  df = pd.DataFrame(data, columns=columns)
134
 
 
135
  unique_states = df['Nation'].unique()
136
 
137
  for state in unique_states:
138
  state_str = str(state)
 
139
  state_data = df[df['Nation'] == state]
 
 
140
  unique_channels = state_data['Outlet Channel'].unique()
 
 
141
  mode_data = []
142
 
143
  for channel in unique_channels:
144
  channel_str = str(channel)
 
145
  channel_data = state_data[state_data['Outlet Channel'] == channel]
 
 
146
  count_series = channel_data.count()
 
 
 
147
  mode_series = channel_data.mode().iloc[0]
148
 
149
+ numeric_columns = channel_data.select_dtypes(include='number').columns
150
+ max_series = channel_data[numeric_columns].max()
151
+ min_series = channel_data[numeric_columns].min()
152
+
153
+ combined_series = pd.concat([
154
+ count_series.rename('COUNT'),
155
+ mode_series.rename('MODE'),
156
+ max_series.rename('MAX'),
157
+ min_series.rename('MIN')
158
+ ], axis=1).T
159
+
160
  combined_series.loc[:,'STATE'] = state_str
161
  combined_series.loc[:,'CHANNEL'] = channel_str
 
162
  mode_data.append(combined_series)
163
 
 
164
  mode_df = pd.concat(mode_data)
 
 
165
  new_ws = wb.create_sheet(title=state_str)
166
 
 
167
  for r in dataframe_to_rows(mode_df, index=False, header=True):
168
  new_ws.append(r)
169
 
 
170
  output_file_path = 'national.xlsx'
171
  wb.save(output_file_path)
172
 
173
  return output_file_path
174
 
 
 
175
  iface = gr.Interface(
176
  fn=process_file,
177
  inputs=gr.File(file_types=[".xlsx"]),
 
185
  inputs=gr.File(file_types=[".xlsx"]),
186
  outputs=gr.File(),
187
  title="Excel File Processor",
188
+ description="Upload an Excel file to process it and generate a new file with regional and channel modes."
189
  )
190
 
191
+ iface3 = gr.Interface(
192
  fn=process_national,
193
  inputs=gr.File(file_types=[".xlsx"]),
194
  outputs=gr.File(),
195
  title="Excel File Processor",
196
+ description="Upload an Excel file to process it and generate a new file with national and channel modes."
197
  )
198
 
199
+ gr.TabbedInterface([iface3, iface2, iface], tab_names=['National', 'Region', 'State']).launch()