Spaces:
Sleeping
Sleeping
| """Export Utilities - CSV, Excel, and REAL Power BI export""" | |
| import pandas as pd | |
| import io | |
| import json | |
| from datetime import datetime | |
| class ExportUtils: | |
| def __init__(self, df): | |
| self.df = df | |
| def to_csv(self): | |
| """Export to CSV""" | |
| return self.df.to_csv(index=False).encode('utf-8') | |
| def to_excel(self): | |
| """Export to Excel with formatting""" | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| # Write main data | |
| self.df.to_excel(writer, sheet_name='Data', index=False) | |
| # Add summary sheet | |
| numeric_cols = self.df.select_dtypes(include=['number']).columns | |
| if len(numeric_cols) > 0: | |
| summary = self.df[numeric_cols].describe() | |
| summary.to_excel(writer, sheet_name='Summary', index=True) | |
| # Add column info sheet | |
| col_info = pd.DataFrame({ | |
| 'Column': self.df.columns, | |
| 'Type': self.df.dtypes.astype(str), | |
| 'Nulls': self.df.isnull().sum(), | |
| 'Unique': self.df.nunique() | |
| }) | |
| col_info.to_excel(writer, sheet_name='Column Info', index=False) | |
| output.seek(0) | |
| return output.getvalue() | |
| def to_powerbi_ready(self): | |
| """Prepare data for Power BI - Creates CSV optimized for Power BI""" | |
| df_powerbi = self.df.copy() | |
| # Clean column names (Power BI friendly) | |
| df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns] | |
| # Clean datetime columns for Power BI | |
| for col in df_powerbi.columns: | |
| if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower(): | |
| try: | |
| df_powerbi[col] = pd.to_datetime(df_powerbi[col]) | |
| except: | |
| pass | |
| # Convert to CSV for Power BI import | |
| return df_powerbi.to_csv(index=False).encode('utf-8') | |
| def to_powerbi_with_metadata(self): | |
| """Export to Power BI with metadata file""" | |
| # Main data CSV | |
| data_csv = self.to_powerbi_ready() | |
| # Create metadata JSON | |
| numeric_cols = self.df.select_dtypes(include=['number']).columns | |
| categorical_cols = self.df.select_dtypes(include=['object']).columns | |
| date_cols = self.df.select_dtypes(include=['datetime64']).columns | |
| metadata = { | |
| 'export_date': datetime.now().isoformat(), | |
| 'table_name': 'Cleaned_Data', | |
| 'row_count': len(self.df), | |
| 'column_count': len(self.df.columns), | |
| 'columns': list(self.df.columns), | |
| 'numeric_columns': list(numeric_cols), | |
| 'categorical_columns': list(categorical_cols), | |
| 'date_columns': list(date_cols), | |
| 'recommended_measures': {}, | |
| 'recommended_visuals': [] | |
| } | |
| # Add recommended measures | |
| for col in numeric_cols[:10]: | |
| metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])' | |
| metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])' | |
| # Add recommended visuals | |
| if len(categorical_cols) > 0 and len(numeric_cols) > 0: | |
| metadata['recommended_visuals'].append({ | |
| 'type': 'bar_chart', | |
| 'category': categorical_cols[0], | |
| 'value': numeric_cols[0], | |
| 'title': f'{numeric_cols[0]} by {categorical_cols[0]}' | |
| }) | |
| if len(date_cols) > 0 and len(numeric_cols) > 0: | |
| metadata['recommended_visuals'].append({ | |
| 'type': 'line_chart', | |
| 'date': date_cols[0], | |
| 'value': numeric_cols[0], | |
| 'title': f'{numeric_cols[0]} Over Time' | |
| }) | |
| metadata_json = json.dumps(metadata, indent=2).encode('utf-8') | |
| return { | |
| 'data': data_csv, | |
| 'metadata': metadata_json, | |
| 'instructions': self._get_powerbi_instructions() | |
| } | |
| def _get_powerbi_instructions(self): | |
| """Get step-by-step Power BI import instructions""" | |
| instructions = """ | |
| === POWER BI IMPORT INSTRUCTIONS === | |
| METHOD 1: Direct Import (Recommended) | |
| 1. Open Power BI Desktop | |
| 2. Click "Get Data" β "Text/CSV" | |
| 3. Select the exported CSV file | |
| 4. Click "Load" | |
| 5. Power BI will auto-detect data types | |
| METHOD 2: Advanced Import | |
| 1. Click "Get Data" β "More..." | |
| 2. Search for "CSV" or "Text" | |
| 3. Select your file | |
| 4. Configure: | |
| - First row as headers: YES | |
| - Data type detection: Based on first 200 rows | |
| 5. Click "Load" | |
| === AFTER IMPORT === | |
| Recommended DAX Measures to Create: | |
| """ | |
| return instructions | |
| def to_powerbi_zip(self): | |
| """Create a zip file with all Power BI resources""" | |
| import zipfile | |
| output = io.BytesIO() | |
| with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| #Add data CSV | |
| data_csv = self.to_powerbi_ready() | |
| zipf.writestr('data.csv', data_csv) | |
| #Add metadata | |
| powerbi_data = self.to_powerbi_with_metadata() | |
| zipf.writestr('metadata.json', powerbi_data['metadata']) | |
| #Add instructions | |
| zipf.writestr('instructions.txt', powerbi_data['instructions']) | |
| #Add sample DAX file | |
| dax_content = self._generate_dax_file() | |
| zipf.writestr('measures.dax', dax_content) | |
| output.seek(0) | |
| return output.getvalue() | |
| def _generate_dax_file(self): | |
| """Generate DAX file for Power BI""" | |
| numeric_cols = self.df.select_dtypes(include=['number']).columns | |
| dax = f"""// DAX Measures for Power BI | |
| // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| // Table Name: Cleaned_Data | |
| // ============ BASIC MEASURES ============ | |
| Total Records = COUNTROWS(Cleaned_Data) | |
| """ | |
| for col in numeric_cols[:15]: | |
| dax += f""" | |
| // {col} Measures | |
| Total {col} = SUM(Cleaned_Data[{col}]) | |
| Average {col} = AVERAGE(Cleaned_Data[{col}]) | |
| Min {col} = MIN(Cleaned_Data[{col}]) | |
| Max {col} = MAX(Cleaned_Data[{col}]) | |
| """ | |
| dax += """ | |
| // ============ HOW TO USE ============ | |
| // 1. In Power BI, go to "Modeling" tab | |
| // 2. Click "New Measure" | |
| // 3. Copy-paste any measure above | |
| // 4. Press Enter to save | |
| // ============ EXAMPLE VISUALS ============ | |
| // - Card Visual: Total Records | |
| // - Bar Chart: Category vs Total Sales | |
| // - Line Chart: Date vs Average Value | |
| """ | |
| return dax | |
| def to_json(self): | |
| """Export to JSON""" | |
| return self.df.to_json(orient='records', indent=2).encode('utf-8') | |
| def get_powerbi_template(self): | |
| """Get Power BI DAX template (legacy - kept for compatibility)""" | |
| numeric_cols = self.df.select_dtypes(include=['number']).columns | |
| categorical_cols = self.df.select_dtypes(include=['object']).columns | |
| template = f"""// Power BI DAX Template for your data | |
| // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| // Table name: Cleaned_Data | |
| // ============ BASIC MEASURES ============ | |
| Total Records = COUNTROWS(Cleaned_Data) | |
| """ | |
| for col in numeric_cols[:10]: | |
| template += f""" | |
| Total {col} = SUM(Cleaned_Data[{col}]) | |
| Average {col} = AVERAGE(Cleaned_Data[{col}]) | |
| """ | |
| template += """ | |
| // ============ HOW TO USE ============ | |
| // 1. Export your data as CSV first | |
| // 2. In Power BI: Get Data β CSV β Select your file | |
| // 3. Go to Modeling tab β New Measure | |
| // 4. Copy and paste any measure above | |
| // 5. Drag measures to visuals | |
| // ============ RECOMMENDED VISUALS ============ | |
| """ | |
| if len(categorical_cols) > 0 and len(numeric_cols) > 0: | |
| template += f""" | |
| - Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]} | |
| """ | |
| if len(self.df.select_dtypes(include=['datetime64']).columns) > 0: | |
| template += f""" | |
| - Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'} | |
| """ | |
| return template |