Multi-AI-Analytics-Platform / data /powerbi_export.py
ayushsahu45's picture
Upload 2 files
31d3380 verified
# import pandas as pd
# import pyarrow as pa
# import pyarrow.parquet as pq
# from pathlib import Path
# from typing import Dict, Any, List, Union
# import json
# from datetime import datetime
# class PowerBIExporter:
# def __init__(self, output_dir: Union[str, Path]):
# self.output_dir = Path(output_dir)
# self.output_dir.mkdir(parents=True, exist_ok=True)
# self.exported_files = []
# def export_to_csv(self, df: pd.DataFrame, filename: str) -> Path:
# output_path = self.output_dir / f"{filename}.csv"
# df.to_csv(output_path, index=False)
# self.exported_files.append(output_path)
# return output_path
# def export_to_parquet(self, df: pd.DataFrame, filename: str) -> Path:
# output_path = self.output_dir / f"{filename}.parquet"
# df.to_parquet(output_path, index=False, engine='pyarrow')
# self.exported_files.append(output_path)
# return output_path
# def export_to_json(self, data: Any, filename: str) -> Path:
# output_path = self.output_dir / f"{filename}.json"
# with open(output_path, 'w', encoding='utf-8') as f:
# json.dump(data, f, indent=2, default=str)
# self.exported_files.append(output_path)
# return output_path
# def create_data_model(self, tables: Dict[str, pd.DataFrame], relationships: List[Dict[str, str]] = None) -> Dict[str, Any]:
# data_model = {
# "tables": {},
# "relationships": relationships or [],
# "created_at": datetime.now().isoformat()
# }
# for table_name, df in tables.items():
# data_model["tables"][table_name] = {
# "columns": df.columns.tolist(),
# "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
# "row_count": len(df),
# "primary_key": df.columns[0] if len(df.columns) > 0 else None
# }
# model_path = self.export_to_json(data_model, "powerbi_data_model")
# return data_model
# def create_analysis_results(self, ml_results: Dict[str, Any], dl_results: Dict[str, Any],
# data_summary: Dict[str, Any]) -> pd.DataFrame:
# results_df = pd.DataFrame([
# {
# "metric_category": "Machine Learning",
# "metric_name": "accuracy" if "accuracy" in ml_results else "mse",
# "metric_value": ml_results.get("accuracy", ml_results.get("mse", 0)),
# "timestamp": datetime.now()
# },
# {
# "metric_category": "Deep Learning",
# "metric_name": "device",
# "metric_value": dl_results.get("device", "unknown"),
# "timestamp": datetime.now()
# },
# {
# "metric_category": "Data Summary",
# "metric_name": "row_count",
# "metric_value": data_summary.get("row_count", 0),
# "timestamp": datetime.now()
# }
# ])
# return results_df
# def export_predictions(self, df: pd.DataFrame, predictions: List[Any],
# probabilities: List[List[float]] = None, filename: str = "predictions") -> Path:
# result_df = df.copy()
# result_df["prediction"] = predictions
# if probabilities:
# for i, probs in enumerate(zip(*probabilities)):
# result_df[f"prob_class_{i}"] = probs
# return self.export_to_csv(result_df, filename)
# def create_dashboard_data(self, analysis_results: Dict[str, Any]) -> Dict[str, pd.DataFrame]:
# dashboard_data = {}
# if "feature_importance" in analysis_results:
# dashboard_data["feature_importance"] = pd.DataFrame(analysis_results["feature_importance"])
# if "predictions" in analysis_results:
# dashboard_data["predictions"] = pd.DataFrame(analysis_results["predictions"])
# if "metrics" in analysis_results:
# metrics_list = []
# for key, value in analysis_results["metrics"].items():
# if isinstance(value, (int, float)):
# metrics_list.append({"metric": key, "value": value})
# if metrics_list:
# dashboard_data["metrics_summary"] = pd.DataFrame(metrics_list)
# return dashboard_data
# def export_all(self, dataframes: Dict[str, pd.DataFrame], include_parquet: bool = True) -> List[Path]:
# exported = []
# for name, df in dataframes.items():
# csv_path = self.export_to_csv(df, name)
# exported.append(csv_path)
# if include_parquet:
# parquet_path = self.export_to_parquet(df, name)
# exported.append(parquet_path)
# return exported
# def get_exported_files(self) -> List[Path]:
# return self.exported_files
# def generate_powerbi_instructions(self) -> str:
# instructions = """
# Power BI Integration Instructions:
# ================================
# 1. Open Power BI Desktop
# 2. Get Data:
# - Click "Get Data" > "More..."
# - Select "Text/CSV" for CSV files
# - Select "Parquet" for Parquet files
# 3. Load the exported data:
# - Navigate to the 'output' folder
# - Select the relevant CSV/Parquet files
# 4. Create relationships:
# - Open "Model" view
# - Drag columns to create relationships between tables
# 5. Build visualizations:
# - Use the "Visualizations" pane
# - Create charts, tables, and KPIs
# Exported files are located in: {output_dir}
# """.format(output_dir=str(self.output_dir))
# return instructions
import pandas as pd
from pathlib import Path
from typing import Dict, Any, List, Union, Optional
import json
from datetime import datetime
class PowerBIExporter:
def __init__(self, output_dir: Union[str, Path]):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.exported_files: List[Path] = []
def export_to_csv(self, df: pd.DataFrame, filename: str) -> Path:
output_path = self.output_dir / f"{filename}.csv"
df.to_csv(output_path, index=False)
self.exported_files.append(output_path)
return output_path
def export_to_parquet(self, df: pd.DataFrame, filename: str) -> Path:
try:
import pyarrow # noqa
output_path = self.output_dir / f"{filename}.parquet"
df.to_parquet(output_path, index=False, engine='pyarrow')
self.exported_files.append(output_path)
return output_path
except ImportError:
# Fallback to CSV if pyarrow not installed
return self.export_to_csv(df, filename + "_parquet_fallback")
def export_to_json(self, data: Any, filename: str) -> Path:
output_path = self.output_dir / f"{filename}.json"
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, default=str)
self.exported_files.append(output_path)
return output_path
def create_data_model(
self,
tables: Dict[str, pd.DataFrame],
relationships: Optional[List[Dict[str, str]]] = None
) -> Dict[str, Any]:
data_model: Dict[str, Any] = {
"tables": {},
"relationships": relationships or [],
"created_at": datetime.now().isoformat(),
}
for table_name, df in tables.items():
data_model["tables"][table_name] = {
"columns": df.columns.tolist(),
"dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
"row_count": len(df),
"primary_key": df.columns[0] if len(df.columns) > 0 else None,
}
self.export_to_json(data_model, "powerbi_data_model")
return data_model
def create_analysis_results(
self,
ml_results: Dict[str, Any],
dl_results: Dict[str, Any],
data_summary: Dict[str, Any],
) -> pd.DataFrame:
rows = [
{
"metric_category": "Machine Learning",
"metric_name": "accuracy" if "accuracy" in ml_results else "mse",
"metric_value": ml_results.get("accuracy", ml_results.get("mse", 0)),
"timestamp": datetime.now(),
},
{
"metric_category": "Deep Learning",
"metric_name": "device",
"metric_value": str(dl_results.get("device", "unknown")),
"timestamp": datetime.now(),
},
{
"metric_category": "Data Summary",
"metric_name": "row_count",
"metric_value": data_summary.get("row_count", 0),
"timestamp": datetime.now(),
},
]
return pd.DataFrame(rows)
def export_predictions(
self,
df: pd.DataFrame,
predictions: List[Any],
probabilities: Optional[List[List[float]]] = None,
filename: str = "predictions",
) -> Path:
result_df = df.copy()
result_df["prediction"] = predictions
if probabilities is not None:
prob_array = list(zip(*probabilities))
for i, probs in enumerate(prob_array):
result_df[f"prob_class_{i}"] = probs
return self.export_to_csv(result_df, filename)
def create_dashboard_data(
self, analysis_results: Dict[str, Any]
) -> Dict[str, pd.DataFrame]:
dashboard_data: Dict[str, pd.DataFrame] = {}
if "feature_importance" in analysis_results:
dashboard_data["feature_importance"] = pd.DataFrame(
analysis_results["feature_importance"]
)
if "predictions" in analysis_results:
dashboard_data["predictions"] = pd.DataFrame(
analysis_results["predictions"]
)
if "metrics" in analysis_results:
metrics_list = [
{"metric": k, "value": v}
for k, v in analysis_results["metrics"].items()
if isinstance(v, (int, float))
]
if metrics_list:
dashboard_data["metrics_summary"] = pd.DataFrame(metrics_list)
return dashboard_data
def export_all(
self,
dataframes: Dict[str, pd.DataFrame],
include_parquet: bool = True,
) -> List[Path]:
exported: List[Path] = []
for name, df in dataframes.items():
exported.append(self.export_to_csv(df, name))
if include_parquet:
exported.append(self.export_to_parquet(df, name))
return exported
def get_exported_files(self) -> List[Path]:
return self.exported_files
def generate_powerbi_instructions(self) -> str:
return f"""
Power BI Integration Instructions
===================================
1. Open Power BI Desktop
2. Get Data:
- Click "Get Data" → "More..."
- Select "Text/CSV" for CSV files
- Select "Parquet" for Parquet files
3. Load the exported data:
- Navigate to: {self.output_dir}
- Select the relevant CSV/Parquet files
4. Create relationships (Model view):
- Drag shared columns between tables to link them
5. Build visualizations:
- Use the "Visualizations" pane to create charts, KPIs, tables
Exported files location: {self.output_dir}
Total files exported: {len(self.exported_files)}
"""