smart-analytics-copilot / app /export_utils.py
SamadhiDBS's picture
Update app/export_utils.py
6ab17dd verified
"""Export Utilities - CSV, Excel, and REAL Power BI export"""
import pandas as pd
import io
import json
from datetime import datetime
class ExportUtils:
def __init__(self, df):
self.df = df
def to_csv(self):
"""Export to CSV"""
return self.df.to_csv(index=False).encode('utf-8')
def to_excel(self):
"""Export to Excel with formatting"""
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
# Write main data
self.df.to_excel(writer, sheet_name='Data', index=False)
# Add summary sheet
numeric_cols = self.df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
summary = self.df[numeric_cols].describe()
summary.to_excel(writer, sheet_name='Summary', index=True)
# Add column info sheet
col_info = pd.DataFrame({
'Column': self.df.columns,
'Type': self.df.dtypes.astype(str),
'Nulls': self.df.isnull().sum(),
'Unique': self.df.nunique()
})
col_info.to_excel(writer, sheet_name='Column Info', index=False)
output.seek(0)
return output.getvalue()
def to_powerbi_ready(self):
"""Prepare data for Power BI - Creates CSV optimized for Power BI"""
df_powerbi = self.df.copy()
# Clean column names (Power BI friendly)
df_powerbi.columns = [col.replace(' ', '_').replace('-', '_').replace('/', '_') for col in df_powerbi.columns]
# Clean datetime columns for Power BI
for col in df_powerbi.columns:
if 'datetime' in col.lower() or 'date' in col.lower() or 'time' in col.lower():
try:
df_powerbi[col] = pd.to_datetime(df_powerbi[col])
except:
pass
# Convert to CSV for Power BI import
return df_powerbi.to_csv(index=False).encode('utf-8')
def to_powerbi_with_metadata(self):
"""Export to Power BI with metadata file"""
# Main data CSV
data_csv = self.to_powerbi_ready()
# Create metadata JSON
numeric_cols = self.df.select_dtypes(include=['number']).columns
categorical_cols = self.df.select_dtypes(include=['object']).columns
date_cols = self.df.select_dtypes(include=['datetime64']).columns
metadata = {
'export_date': datetime.now().isoformat(),
'table_name': 'Cleaned_Data',
'row_count': len(self.df),
'column_count': len(self.df.columns),
'columns': list(self.df.columns),
'numeric_columns': list(numeric_cols),
'categorical_columns': list(categorical_cols),
'date_columns': list(date_cols),
'recommended_measures': {},
'recommended_visuals': []
}
# Add recommended measures
for col in numeric_cols[:10]:
metadata['recommended_measures'][f'Total_{col}'] = f'SUM(Cleaned_Data[{col}])'
metadata['recommended_measures'][f'Average_{col}'] = f'AVERAGE(Cleaned_Data[{col}])'
# Add recommended visuals
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
metadata['recommended_visuals'].append({
'type': 'bar_chart',
'category': categorical_cols[0],
'value': numeric_cols[0],
'title': f'{numeric_cols[0]} by {categorical_cols[0]}'
})
if len(date_cols) > 0 and len(numeric_cols) > 0:
metadata['recommended_visuals'].append({
'type': 'line_chart',
'date': date_cols[0],
'value': numeric_cols[0],
'title': f'{numeric_cols[0]} Over Time'
})
metadata_json = json.dumps(metadata, indent=2).encode('utf-8')
return {
'data': data_csv,
'metadata': metadata_json,
'instructions': self._get_powerbi_instructions()
}
def _get_powerbi_instructions(self):
"""Get step-by-step Power BI import instructions"""
instructions = """
=== POWER BI IMPORT INSTRUCTIONS ===
METHOD 1: Direct Import (Recommended)
1. Open Power BI Desktop
2. Click "Get Data" β†’ "Text/CSV"
3. Select the exported CSV file
4. Click "Load"
5. Power BI will auto-detect data types
METHOD 2: Advanced Import
1. Click "Get Data" β†’ "More..."
2. Search for "CSV" or "Text"
3. Select your file
4. Configure:
- First row as headers: YES
- Data type detection: Based on first 200 rows
5. Click "Load"
=== AFTER IMPORT ===
Recommended DAX Measures to Create:
"""
return instructions
def to_powerbi_zip(self):
"""Create a zip file with all Power BI resources"""
import zipfile
output = io.BytesIO()
with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zipf:
#Add data CSV
data_csv = self.to_powerbi_ready()
zipf.writestr('data.csv', data_csv)
#Add metadata
powerbi_data = self.to_powerbi_with_metadata()
zipf.writestr('metadata.json', powerbi_data['metadata'])
#Add instructions
zipf.writestr('instructions.txt', powerbi_data['instructions'])
#Add sample DAX file
dax_content = self._generate_dax_file()
zipf.writestr('measures.dax', dax_content)
output.seek(0)
return output.getvalue()
def _generate_dax_file(self):
"""Generate DAX file for Power BI"""
numeric_cols = self.df.select_dtypes(include=['number']).columns
dax = f"""// DAX Measures for Power BI
// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
// Table Name: Cleaned_Data
// ============ BASIC MEASURES ============
Total Records = COUNTROWS(Cleaned_Data)
"""
for col in numeric_cols[:15]:
dax += f"""
// {col} Measures
Total {col} = SUM(Cleaned_Data[{col}])
Average {col} = AVERAGE(Cleaned_Data[{col}])
Min {col} = MIN(Cleaned_Data[{col}])
Max {col} = MAX(Cleaned_Data[{col}])
"""
dax += """
// ============ HOW TO USE ============
// 1. In Power BI, go to "Modeling" tab
// 2. Click "New Measure"
// 3. Copy-paste any measure above
// 4. Press Enter to save
// ============ EXAMPLE VISUALS ============
// - Card Visual: Total Records
// - Bar Chart: Category vs Total Sales
// - Line Chart: Date vs Average Value
"""
return dax
def to_json(self):
"""Export to JSON"""
return self.df.to_json(orient='records', indent=2).encode('utf-8')
def get_powerbi_template(self):
"""Get Power BI DAX template (legacy - kept for compatibility)"""
numeric_cols = self.df.select_dtypes(include=['number']).columns
categorical_cols = self.df.select_dtypes(include=['object']).columns
template = f"""// Power BI DAX Template for your data
// Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
// Table name: Cleaned_Data
// ============ BASIC MEASURES ============
Total Records = COUNTROWS(Cleaned_Data)
"""
for col in numeric_cols[:10]:
template += f"""
Total {col} = SUM(Cleaned_Data[{col}])
Average {col} = AVERAGE(Cleaned_Data[{col}])
"""
template += """
// ============ HOW TO USE ============
// 1. Export your data as CSV first
// 2. In Power BI: Get Data β†’ CSV β†’ Select your file
// 3. Go to Modeling tab β†’ New Measure
// 4. Copy and paste any measure above
// 5. Drag measures to visuals
// ============ RECOMMENDED VISUALS ============
"""
if len(categorical_cols) > 0 and len(numeric_cols) > 0:
template += f"""
- Bar Chart: {categorical_cols[0]} vs {numeric_cols[0]}
"""
if len(self.df.select_dtypes(include=['datetime64']).columns) > 0:
template += f"""
- Line Chart: Date vs {numeric_cols[0] if len(numeric_cols) > 0 else 'Value'}
"""
return template