SamadhiDBS commited on
Commit
6ab17dd
Β·
verified Β·
1 Parent(s): a897569

Update app/export_utils.py

Browse files
Files changed (1) hide show
  1. app/export_utils.py +248 -250
app/export_utils.py CHANGED
@@ -1,251 +1,249 @@
1
- """
2
- Export Utilities - CSV, Excel, and REAL Power BI export
3
- """
4
-
5
- import pandas as pd
6
- import io
7
- import json
8
- from datetime import datetime
9
-
10
- class ExportUtils:
11
- def __init__(self, df):
12
- self.df = df
13
-
14
- def to_csv(self):
15
- """Export to CSV"""
16
- return self.df.to_csv(index=False).encode('utf-8')
17
-
18
- def to_excel(self):
19
- """Export to Excel with formatting"""
20
- output = io.BytesIO()
21
- with pd.ExcelWriter(output, engine='openpyxl') as writer:
22
- # Write main data
23
- self.df.to_excel(writer, sheet_name='Data', index=False)
24
-
25
- # Add summary sheet
26
- numeric_cols = self.df.select_dtypes(include=['number']).columns
27
- if len(numeric_cols) > 0:
28
- summary = self.df[numeric_cols].describe()
29
- summary.to_excel(writer, sheet_name='Summary', index=True)
30
-
31
- # Add column info sheet
32
- col_info = pd.DataFrame({
33
- 'Column': self.df.columns,
34
- 'Type': self.df.dtypes.astype(str),
35
- 'Nulls': self.df.isnull().sum(),
36
- 'Unique': self.df.nunique()
37
- })
38
- col_info.to_excel(writer, sheet_name='Column Info', index=False)
39
-
40
- output.seek(0)
41
- return output.getvalue()
42
-
43
- def to_powerbi_ready(self):
44
- """Prepare data for Power BI - Creates CSV optimized for Power BI"""
45
- df_powerbi = self.df.copy()
46
-
47
- # Clean column names (Power BI friendly)
48
- df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
49
-
50
- # Clean datetime columns for Power BI
51
- for col in df_powerbi.columns:
52
- if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
53
- try:
54
- df_powerbi[col] = pd.to_datetime(df_powerbi[col])
55
- except:
56
- pass
57
-
58
- # Convert to CSV for Power BI import
59
- return df_powerbi.to_csv(index=False).encode('utf-8')
60
-
61
- def to_powerbi_with_metadata(self):
62
- """Export to Power BI with metadata file"""
63
- # Main data CSV
64
- data_csv = self.to_powerbi_ready()
65
-
66
- # Create metadata JSON
67
- numeric_cols = self.df.select_dtypes(include=['number']).columns
68
- categorical_cols = self.df.select_dtypes(include=['object']).columns
69
- date_cols = self.df.select_dtypes(include=['datetime64']).columns
70
-
71
- metadata = {
72
- 'export_date': datetime.now().isoformat(),
73
- 'table_name': 'Cleaned_Data',
74
- 'row_count': len(self.df),
75
- 'column_count': len(self.df.columns),
76
- 'columns': list(self.df.columns),
77
- 'numeric_columns': list(numeric_cols),
78
- 'categorical_columns': list(categorical_cols),
79
- 'date_columns': list(date_cols),
80
- 'recommended_measures': {},
81
- 'recommended_visuals': []
82
- }
83
-
84
- # Add recommended measures
85
- for col in numeric_cols[:10]:
86
- metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
87
- metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
88
-
89
- # Add recommended visuals
90
- if len(categorical_cols) > 0 and len(numeric_cols) > 0:
91
- metadata['recommended_visuals'].append({
92
- 'type': 'bar_chart',
93
- 'category': categorical_cols[0],
94
- 'value': numeric_cols[0],
95
- 'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
96
- })
97
-
98
- if len(date_cols) > 0 and len(numeric_cols) > 0:
99
- metadata['recommended_visuals'].append({
100
- 'type': 'line_chart',
101
- 'date': date_cols[0],
102
- 'value': numeric_cols[0],
103
- 'title': f'{numeric_cols[0]} Over Time'
104
- })
105
-
106
- metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
107
-
108
- return {
109
- 'data': data_csv,
110
- 'metadata': metadata_json,
111
- 'instructions': self._get_powerbi_instructions()
112
- }
113
-
114
- def _get_powerbi_instructions(self):
115
- """Get step-by-step Power BI import instructions"""
116
- instructions = """
117
- === POWER BI IMPORT INSTRUCTIONS ===
118
-
119
- METHOD 1: Direct Import (Recommended)
120
- 1. Open Power BI Desktop
121
- 2. Click "Get Data" β†’ "Text/CSV"
122
- 3. Select the exported CSV file
123
- 4. Click "Load"
124
- 5. Power BI will auto-detect data types
125
-
126
- METHOD 2: Advanced Import
127
- 1. Click "Get Data" β†’ "More..."
128
- 2. Search for "CSV" or "Text"
129
- 3. Select your file
130
- 4. Configure:
131
- - First row as headers: YES
132
- - Data type detection: Based on first 200 rows
133
- 5. Click "Load"
134
-
135
- === AFTER IMPORT ===
136
-
137
- Recommended DAX Measures to Create:
138
-
139
- """
140
- return instructions
141
-
142
- def to_powerbi_zip(self):
143
- """Create a zip file with all Power BI resources"""
144
- import zipfile
145
-
146
- output = io.BytesIO()
147
- with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
148
- # Add data CSV
149
- data_csv = self.to_powerbi_ready()
150
- zipf.writestr('data.csv', data_csv)
151
-
152
- # Add metadata
153
- powerbi_data = self.to_powerbi_with_metadata()
154
- zipf.writestr('metadata.json', powerbi_data['metadata'])
155
-
156
- # Add instructions
157
- zipf.writestr('instructions.txt', powerbi_data['instructions'])
158
-
159
- # Add sample DAX file
160
- dax_content = self._generate_dax_file()
161
- zipf.writestr('measures.dax', dax_content)
162
-
163
- output.seek(0)
164
- return output.getvalue()
165
-
166
- def _generate_dax_file(self):
167
- """Generate DAX file for Power BI"""
168
- numeric_cols = self.df.select_dtypes(include=['number']).columns
169
-
170
- dax = f"""// DAX Measures for Power BI
171
- // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
172
- // Table Name: Cleaned_Data
173
-
174
- // ============ BASIC MEASURES ============
175
-
176
- Total Records = COUNTROWS(Cleaned_Data)
177
-
178
- """
179
-
180
- for col in numeric_cols[:15]:
181
- dax += f"""
182
- // {col} Measures
183
- Total {col} = SUM(Cleaned_Data[{col}])
184
- Average {col} = AVERAGE(Cleaned_Data[{col}])
185
- Min {col} = MIN(Cleaned_Data[{col}])
186
- Max {col} = MAX(Cleaned_Data[{col}])
187
-
188
- """
189
-
190
- dax += """
191
- // ============ HOW TO USE ============
192
- // 1. In Power BI, go to "Modeling" tab
193
- // 2. Click "New Measure"
194
- // 3. Copy-paste any measure above
195
- // 4. Press Enter to save
196
-
197
- // ============ EXAMPLE VISUALS ============
198
- // - Card Visual: Total Records
199
- // - Bar Chart: Category vs Total Sales
200
- // - Line Chart: Date vs Average Value
201
- """
202
-
203
- return dax
204
-
205
- def to_json(self):
206
- """Export to JSON"""
207
- return self.df.to_json(orient='records', indent=2).encode('utf-8')
208
-
209
- def get_powerbi_template(self):
210
- """Get Power BI DAX template (legacy - kept for compatibility)"""
211
- numeric_cols = self.df.select_dtypes(include=['number']).columns
212
- categorical_cols = self.df.select_dtypes(include=['object']).columns
213
-
214
- template = f"""// Power BI DAX Template for your data
215
- // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
216
- // Table name: Cleaned_Data
217
-
218
- // ============ BASIC MEASURES ============
219
-
220
- Total Records = COUNTROWS(Cleaned_Data)
221
-
222
- """
223
-
224
- for col in numeric_cols[:10]:
225
- template += f"""
226
- Total {col} = SUM(Cleaned_Data[{col}])
227
- Average {col} = AVERAGE(Cleaned_Data[{col}])
228
- """
229
-
230
- template += """
231
- // ============ HOW TO USE ============
232
- // 1. Export your data as CSV first
233
- // 2. In Power BI: Get Data β†’ CSV β†’ Select your file
234
- // 3. Go to Modeling tab β†’ New Measure
235
- // 4. Copy and paste any measure above
236
- // 5. Drag measures to visuals
237
-
238
- // ============ RECOMMENDED VISUALS ============
239
- """
240
-
241
- if len(categorical_cols) > 0 and len(numeric_cols) > 0:
242
- template += f"""
243
- - Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
244
- """
245
-
246
- if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
247
- template += f"""
248
- - Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
249
- """
250
-
251
  return template
 
1
+ """Export Utilities - CSV, Excel, and REAL Power BI export"""
2
+
3
+ import pandas as pd
4
+ import io
5
+ import json
6
+ from datetime import datetime
7
+
8
+ class ExportUtils:
9
+ def __init__(self, df):
10
+ self.df = df
11
+
12
+ def to_csv(self):
13
+ """Export to CSV"""
14
+ return self.df.to_csv(index=False).encode('utf-8')
15
+
16
+ def to_excel(self):
17
+ """Export to Excel with formatting"""
18
+ output = io.BytesIO()
19
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
20
+ # Write main data
21
+ self.df.to_excel(writer, sheet_name='Data', index=False)
22
+
23
+ # Add summary sheet
24
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
25
+ if len(numeric_cols) > 0:
26
+ summary = self.df[numeric_cols].describe()
27
+ summary.to_excel(writer, sheet_name='Summary', index=True)
28
+
29
+ # Add column info sheet
30
+ col_info = pd.DataFrame({
31
+ 'Column': self.df.columns,
32
+ 'Type': self.df.dtypes.astype(str),
33
+ 'Nulls': self.df.isnull().sum(),
34
+ 'Unique': self.df.nunique()
35
+ })
36
+ col_info.to_excel(writer, sheet_name='Column Info', index=False)
37
+
38
+ output.seek(0)
39
+ return output.getvalue()
40
+
41
+ def to_powerbi_ready(self):
42
+ """Prepare data for Power BI - Creates CSV optimized for Power BI"""
43
+ df_powerbi = self.df.copy()
44
+
45
+ # Clean column names (Power BI friendly)
46
+ df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
47
+
48
+ # Clean datetime columns for Power BI
49
+ for col in df_powerbi.columns:
50
+ if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
51
+ try:
52
+ df_powerbi[col] = pd.to_datetime(df_powerbi[col])
53
+ except:
54
+ pass
55
+
56
+ # Convert to CSV for Power BI import
57
+ return df_powerbi.to_csv(index=False).encode('utf-8')
58
+
59
+ def to_powerbi_with_metadata(self):
60
+ """Export to Power BI with metadata file"""
61
+ # Main data CSV
62
+ data_csv = self.to_powerbi_ready()
63
+
64
+ # Create metadata JSON
65
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
66
+ categorical_cols = self.df.select_dtypes(include=['object']).columns
67
+ date_cols = self.df.select_dtypes(include=['datetime64']).columns
68
+
69
+ metadata = {
70
+ 'export_date': datetime.now().isoformat(),
71
+ 'table_name': 'Cleaned_Data',
72
+ 'row_count': len(self.df),
73
+ 'column_count': len(self.df.columns),
74
+ 'columns': list(self.df.columns),
75
+ 'numeric_columns': list(numeric_cols),
76
+ 'categorical_columns': list(categorical_cols),
77
+ 'date_columns': list(date_cols),
78
+ 'recommended_measures': {},
79
+ 'recommended_visuals': []
80
+ }
81
+
82
+ # Add recommended measures
83
+ for col in numeric_cols[:10]:
84
+ metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
85
+ metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
86
+
87
+ # Add recommended visuals
88
+ if len(categorical_cols) > 0 and len(numeric_cols) > 0:
89
+ metadata['recommended_visuals'].append({
90
+ 'type': 'bar_chart',
91
+ 'category': categorical_cols[0],
92
+ 'value': numeric_cols[0],
93
+ 'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
94
+ })
95
+
96
+ if len(date_cols) > 0 and len(numeric_cols) > 0:
97
+ metadata['recommended_visuals'].append({
98
+ 'type': 'line_chart',
99
+ 'date': date_cols[0],
100
+ 'value': numeric_cols[0],
101
+ 'title': f'{numeric_cols[0]} Over Time'
102
+ })
103
+
104
+ metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
105
+
106
+ return {
107
+ 'data': data_csv,
108
+ 'metadata': metadata_json,
109
+ 'instructions': self._get_powerbi_instructions()
110
+ }
111
+
112
+ def _get_powerbi_instructions(self):
113
+ """Get step-by-step Power BI import instructions"""
114
+ instructions = """
115
+ === POWER BI IMPORT INSTRUCTIONS ===
116
+
117
+ METHOD 1: Direct Import (Recommended)
118
+ 1. Open Power BI Desktop
119
+ 2. Click "Get Data" β†’ "Text/CSV"
120
+ 3. Select the exported CSV file
121
+ 4. Click "Load"
122
+ 5. Power BI will auto-detect data types
123
+
124
+ METHOD 2: Advanced Import
125
+ 1. Click "Get Data" β†’ "More..."
126
+ 2. Search for "CSV" or "Text"
127
+ 3. Select your file
128
+ 4. Configure:
129
+ - First row as headers: YES
130
+ - Data type detection: Based on first 200 rows
131
+ 5. Click "Load"
132
+
133
+ === AFTER IMPORT ===
134
+
135
+ Recommended DAX Measures to Create:
136
+
137
+ """
138
+ return instructions
139
+
140
+ def to_powerbi_zip(self):
141
+ """Create a zip file with all Power BI resources"""
142
+ import zipfile
143
+
144
+ output = io.BytesIO()
145
+ with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
146
+ #Add data CSV
147
+ data_csv = self.to_powerbi_ready()
148
+ zipf.writestr('data.csv', data_csv)
149
+
150
+ #Add metadata
151
+ powerbi_data = self.to_powerbi_with_metadata()
152
+ zipf.writestr('metadata.json', powerbi_data['metadata'])
153
+
154
+ #Add instructions
155
+ zipf.writestr('instructions.txt', powerbi_data['instructions'])
156
+
157
+ #Add sample DAX file
158
+ dax_content = self._generate_dax_file()
159
+ zipf.writestr('measures.dax', dax_content)
160
+
161
+ output.seek(0)
162
+ return output.getvalue()
163
+
164
+ def _generate_dax_file(self):
165
+ """Generate DAX file for Power BI"""
166
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
167
+
168
+ dax = f"""// DAX Measures for Power BI
169
+ // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
170
+ // Table Name: Cleaned_Data
171
+
172
+ // ============ BASIC MEASURES ============
173
+
174
+ Total Records = COUNTROWS(Cleaned_Data)
175
+
176
+ """
177
+
178
+ for col in numeric_cols[:15]:
179
+ dax += f"""
180
+ // {col} Measures
181
+ Total {col} = SUM(Cleaned_Data[{col}])
182
+ Average {col} = AVERAGE(Cleaned_Data[{col}])
183
+ Min {col} = MIN(Cleaned_Data[{col}])
184
+ Max {col} = MAX(Cleaned_Data[{col}])
185
+
186
+ """
187
+
188
+ dax += """
189
+ // ============ HOW TO USE ============
190
+ // 1. In Power BI, go to "Modeling" tab
191
+ // 2. Click "New Measure"
192
+ // 3. Copy-paste any measure above
193
+ // 4. Press Enter to save
194
+
195
+ // ============ EXAMPLE VISUALS ============
196
+ // - Card Visual: Total Records
197
+ // - Bar Chart: Category vs Total Sales
198
+ // - Line Chart: Date vs Average Value
199
+ """
200
+
201
+ return dax
202
+
203
+ def to_json(self):
204
+ """Export to JSON"""
205
+ return self.df.to_json(orient='records', indent=2).encode('utf-8')
206
+
207
+ def get_powerbi_template(self):
208
+ """Get Power BI DAX template (legacy - kept for compatibility)"""
209
+ numeric_cols = self.df.select_dtypes(include=['number']).columns
210
+ categorical_cols = self.df.select_dtypes(include=['object']).columns
211
+
212
+ template = f"""// Power BI DAX Template for your data
213
+ // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
214
+ // Table name: Cleaned_Data
215
+
216
+ // ============ BASIC MEASURES ============
217
+
218
+ Total Records = COUNTROWS(Cleaned_Data)
219
+
220
+ """
221
+
222
+ for col in numeric_cols[:10]:
223
+ template += f"""
224
+ Total {col} = SUM(Cleaned_Data[{col}])
225
+ Average {col} = AVERAGE(Cleaned_Data[{col}])
226
+ """
227
+
228
+ template += """
229
+ // ============ HOW TO USE ============
230
+ // 1. Export your data as CSV first
231
+ // 2. In Power BI: Get Data β†’ CSV β†’ Select your file
232
+ // 3. Go to Modeling tab β†’ New Measure
233
+ // 4. Copy and paste any measure above
234
+ // 5. Drag measures to visuals
235
+
236
+ // ============ RECOMMENDED VISUALS ============
237
+ """
238
+
239
+ if len(categorical_cols) > 0 and len(numeric_cols) > 0:
240
+ template += f"""
241
+ - Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
242
+ """
243
+
244
+ if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
245
+ template += f"""
246
+ - Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
247
+ """
248
+
 
 
249
  return template