"""Export Utilities - CSV, Excel, and REAL Power BI export""" import pandas as pd import io import json from datetime import datetime class ExportUtils: def __init__(self, df): self.df = df def to_csv(self): """Export to CSV""" return self.df.to_csv(index=False).encode('utf-8') def to_excel(self): """Export to Excel with formatting""" output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: # Write main data self.df.to_excel(writer, sheet_name='Data', index=False) # Add summary sheet numeric_cols = self.df.select_dtypes(include=['number']).columns if len(numeric_cols) > 0: summary = self.df[numeric_cols].describe() summary.to_excel(writer, sheet_name='Summary', index=True) # Add column info sheet col_info = pd.DataFrame({ 'Column': self.df.columns, 'Type': self.df.dtypes.astype(str), 'Nulls': self.df.isnull().sum(), 'Unique': self.df.nunique() }) col_info.to_excel(writer, sheet_name='Column Info', index=False) output.seek(0) return output.getvalue() def to_powerbi_ready(self): """Prepare data for Power BI - Creates CSV optimized for Power BI""" df_powerbi = self.df.copy() # Clean column names (Power BI friendly) df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns] # Clean datetime columns for Power BI for col in df_powerbi.columns: if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower(): try: df_powerbi[col] = pd.to_datetime(df_powerbi[col]) except: pass # Convert to CSV for Power BI import return df_powerbi.to_csv(index=False).encode('utf-8') def to_powerbi_with_metadata(self): """Export to Power BI with metadata file""" # Main data CSV data_csv = self.to_powerbi_ready() # Create metadata JSON numeric_cols = self.df.select_dtypes(include=['number']).columns categorical_cols = self.df.select_dtypes(include=['object']).columns date_cols = self.df.select_dtypes(include=['datetime64']).columns metadata = { 'export_date': datetime.now().isoformat(), 'table_name': 'Cleaned_Data', 'row_count': len(self.df), 'column_count': len(self.df.columns), 'columns': list(self.df.columns), 'numeric_columns': list(numeric_cols), 'categorical_columns': list(categorical_cols), 'date_columns': list(date_cols), 'recommended_measures': {}, 'recommended_visuals': [] } # Add recommended measures for col in numeric_cols[:10]: metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])' metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])' # Add recommended visuals if len(categorical_cols) > 0 and len(numeric_cols) > 0: metadata['recommended_visuals'].append({ 'type': 'bar_chart', 'category': categorical_cols[0], 'value': numeric_cols[0], 'title': f'{numeric_cols[0]} by {categorical_cols[0]}' }) if len(date_cols) > 0 and len(numeric_cols) > 0: metadata['recommended_visuals'].append({ 'type': 'line_chart', 'date': date_cols[0], 'value': numeric_cols[0], 'title': f'{numeric_cols[0]} Over Time' }) metadata_json = json.dumps(metadata, indent=2).encode('utf-8') return { 'data': data_csv, 'metadata': metadata_json, 'instructions': self._get_powerbi_instructions() } def _get_powerbi_instructions(self): """Get step-by-step Power BI import instructions""" instructions = """ === POWER BI IMPORT INSTRUCTIONS === METHOD 1: Direct Import (Recommended) 1. Open Power BI Desktop 2. Click "Get Data" → "Text/CSV" 3. Select the exported CSV file 4. Click "Load" 5. Power BI will auto-detect data types METHOD 2: Advanced Import 1. Click "Get Data" → "More..." 2. Search for "CSV" or "Text" 3. Select your file 4. Configure: - First row as headers: YES - Data type detection: Based on first 200 rows 5. Click "Load" === AFTER IMPORT === Recommended DAX Measures to Create: """ return instructions def to_powerbi_zip(self): """Create a zip file with all Power BI resources""" import zipfile output = io.BytesIO() with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf: #Add data CSV data_csv = self.to_powerbi_ready() zipf.writestr('data.csv', data_csv) #Add metadata powerbi_data = self.to_powerbi_with_metadata() zipf.writestr('metadata.json', powerbi_data['metadata']) #Add instructions zipf.writestr('instructions.txt', powerbi_data['instructions']) #Add sample DAX file dax_content = self._generate_dax_file() zipf.writestr('measures.dax', dax_content) output.seek(0) return output.getvalue() def _generate_dax_file(self): """Generate DAX file for Power BI""" numeric_cols = self.df.select_dtypes(include=['number']).columns dax = f"""// DAX Measures for Power BI // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} // Table Name: Cleaned_Data // ============ BASIC MEASURES ============ Total Records = COUNTROWS(Cleaned_Data) """ for col in numeric_cols[:15]: dax += f""" // {col} Measures Total {col} = SUM(Cleaned_Data[{col}]) Average {col} = AVERAGE(Cleaned_Data[{col}]) Min {col} = MIN(Cleaned_Data[{col}]) Max {col} = MAX(Cleaned_Data[{col}]) """ dax += """ // ============ HOW TO USE ============ // 1. In Power BI, go to "Modeling" tab // 2. Click "New Measure" // 3. Copy-paste any measure above // 4. Press Enter to save // ============ EXAMPLE VISUALS ============ // - Card Visual: Total Records // - Bar Chart: Category vs Total Sales // - Line Chart: Date vs Average Value """ return dax def to_json(self): """Export to JSON""" return self.df.to_json(orient='records', indent=2).encode('utf-8') def get_powerbi_template(self): """Get Power BI DAX template (legacy - kept for compatibility)""" numeric_cols = self.df.select_dtypes(include=['number']).columns categorical_cols = self.df.select_dtypes(include=['object']).columns template = f"""// Power BI DAX Template for your data // Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} // Table name: Cleaned_Data // ============ BASIC MEASURES ============ Total Records = COUNTROWS(Cleaned_Data) """ for col in numeric_cols[:10]: template += f""" Total {col} = SUM(Cleaned_Data[{col}]) Average {col} = AVERAGE(Cleaned_Data[{col}]) """ template += """ // ============ HOW TO USE ============ // 1. Export your data as CSV first // 2. In Power BI: Get Data → CSV → Select your file // 3. Go to Modeling tab → New Measure // 4. Copy and paste any measure above // 5. Drag measures to visuals // ============ RECOMMENDED VISUALS ============ """ if len(categorical_cols) > 0 and len(numeric_cols) > 0: template += f""" - Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]} """ if len(self.df.select_dtypes(include=['datetime64']).columns) > 0: template += f""" - Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'} """ return template