Spaces:
Sleeping
Sleeping
File size: 8,311 Bytes
6ab17dd d18f851 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | """Export Utilities - CSV, Excel, and REAL Power BI export"""
import pandas as pd
import io
import json
from datetime import datetime
class ExportUtils:
def __init__(self, df):
self.df = df
def to_csv(self):
"""Export to CSV"""
return self.df.to_csv(index=False).encode('utf-8')
def to_excel(self):
"""Export to Excel with formatting"""
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
# Write main data
self.df.to_excel(writer, sheet_name='Data', index=False)
# Add summary sheet
numeric_cols = self.df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
summary = self.df[numeric_cols].describe()
summary.to_excel(writer, sheet_name='Summary', index=True)
# Add column info sheet
col_info = pd.DataFrame({
'Column': self.df.columns,
'Type': self.df.dtypes.astype(str),
'Nulls': self.df.isnull().sum(),
'Unique': self.df.nunique()
})
col_info.to_excel(writer, sheet_name='Column Info', index=False)
output.seek(0)
return output.getvalue()
def to_powerbi_ready(self):
"""Prepare data for Power BI - Creates CSV optimized for Power BI"""
df_powerbi = self.df.copy()
# Clean column names (Power BI friendly)
df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
# Clean datetime columns for Power BI
for col in df_powerbi.columns:
if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
try:
df_powerbi[col] = pd.to_datetime(df_powerbi[col])
except:
pass
# Convert to CSV for Power BI import
return df_powerbi.to_csv(index=False).encode('utf-8')
def to_powerbi_with_metadata(self):
"""Export to Power BI with metadata file"""
# Main data CSV
data_csv = self.to_powerbi_ready()
# Create metadata JSON
numeric_cols = self.df.select_dtypes(include=['number']).columns
categorical_cols = self.df.select_dtypes(include=['object']).columns
date_cols = self.df.select_dtypes(include=['datetime64']).columns
metadata = {
'export_date': datetime.now().isoformat(),
'table_name': 'Cleaned_Data',
'row_count': len(self.df),
'column_count': len(self.df.columns),
'columns': list(self.df.columns),
'numeric_columns': list(numeric_cols),
'categorical_columns': list(categorical_cols),
'date_columns': list(date_cols),
'recommended_measures': {},
'recommended_visuals': []
}
# Add recommended measures
for col in numeric_cols[:10]:
metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
# Add recommended visuals
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
metadata['recommended_visuals'].append({
'type': 'bar_chart',
'category': categorical_cols[0],
'value': numeric_cols[0],
'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
})
if len(date_cols) > 0 and len(numeric_cols) > 0:
metadata['recommended_visuals'].append({
'type': 'line_chart',
'date': date_cols[0],
'value': numeric_cols[0],
'title': f'{numeric_cols[0]} Over Time'
})
metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
return {
'data': data_csv,
'metadata': metadata_json,
'instructions': self._get_powerbi_instructions()
}
def _get_powerbi_instructions(self):
"""Get step-by-step Power BI import instructions"""
instructions = """
=== POWER BI IMPORT INSTRUCTIONS ===
METHOD 1: Direct Import (Recommended)
1. Open Power BI Desktop
2. Click "Get Data" β "Text/CSV"
3. Select the exported CSV file
4. Click "Load"
5. Power BI will auto-detect data types
METHOD 2: Advanced Import
1. Click "Get Data" β "More..."
2. Search for "CSV" or "Text"
3. Select your file
4. Configure:
- First row as headers: YES
- Data type detection: Based on first 200 rows
5. Click "Load"
=== AFTER IMPORT ===
Recommended DAX Measures to Create:
"""
return instructions
def to_powerbi_zip(self):
"""Create a zip file with all Power BI resources"""
import zipfile
output = io.BytesIO()
with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
#Add data CSV
data_csv = self.to_powerbi_ready()
zipf.writestr('data.csv', data_csv)
#Add metadata
powerbi_data = self.to_powerbi_with_metadata()
zipf.writestr('metadata.json', powerbi_data['metadata'])
#Add instructions
zipf.writestr('instructions.txt', powerbi_data['instructions'])
#Add sample DAX file
dax_content = self._generate_dax_file()
zipf.writestr('measures.dax', dax_content)
output.seek(0)
return output.getvalue()
def _generate_dax_file(self):
"""Generate DAX file for Power BI"""
numeric_cols = self.df.select_dtypes(include=['number']).columns
dax = f"""// DAX Measures for Power BI
// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
// Table Name: Cleaned_Data
// ============ BASIC MEASURES ============
Total Records = COUNTROWS(Cleaned_Data)
"""
for col in numeric_cols[:15]:
dax += f"""
// {col} Measures
Total {col} = SUM(Cleaned_Data[{col}])
Average {col} = AVERAGE(Cleaned_Data[{col}])
Min {col} = MIN(Cleaned_Data[{col}])
Max {col} = MAX(Cleaned_Data[{col}])
"""
dax += """
// ============ HOW TO USE ============
// 1. In Power BI, go to "Modeling" tab
// 2. Click "New Measure"
// 3. Copy-paste any measure above
// 4. Press Enter to save
// ============ EXAMPLE VISUALS ============
// - Card Visual: Total Records
// - Bar Chart: Category vs Total Sales
// - Line Chart: Date vs Average Value
"""
return dax
def to_json(self):
"""Export to JSON"""
return self.df.to_json(orient='records', indent=2).encode('utf-8')
def get_powerbi_template(self):
"""Get Power BI DAX template (legacy - kept for compatibility)"""
numeric_cols = self.df.select_dtypes(include=['number']).columns
categorical_cols = self.df.select_dtypes(include=['object']).columns
template = f"""// Power BI DAX Template for your data
// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
// Table name: Cleaned_Data
// ============ BASIC MEASURES ============
Total Records = COUNTROWS(Cleaned_Data)
"""
for col in numeric_cols[:10]:
template += f"""
Total {col} = SUM(Cleaned_Data[{col}])
Average {col} = AVERAGE(Cleaned_Data[{col}])
"""
template += """
// ============ HOW TO USE ============
// 1. Export your data as CSV first
// 2. In Power BI: Get Data β CSV β Select your file
// 3. Go to Modeling tab β New Measure
// 4. Copy and paste any measure above
// 5. Drag measures to visuals
// ============ RECOMMENDED VISUALS ============
"""
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
template += f"""
- Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
"""
if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
template += f"""
- Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
"""
return template |