| # import pandas as pd | |
| # import pyarrow as pa | |
| # import pyarrow.parquet as pq | |
| # from pathlib import Path | |
| # from typing import Dict, Any, List, Union | |
| # import json | |
| # from datetime import datetime | |
| # class PowerBIExporter: | |
| # def __init__(self, output_dir: Union[str, Path]): | |
| # self.output_dir = Path(output_dir) | |
| # self.output_dir.mkdir(parents=True, exist_ok=True) | |
| # self.exported_files = [] | |
| # def export_to_csv(self, df: pd.DataFrame, filename: str) -> Path: | |
| # output_path = self.output_dir / f"{filename}.csv" | |
| # df.to_csv(output_path, index=False) | |
| # self.exported_files.append(output_path) | |
| # return output_path | |
| # def export_to_parquet(self, df: pd.DataFrame, filename: str) -> Path: | |
| # output_path = self.output_dir / f"{filename}.parquet" | |
| # df.to_parquet(output_path, index=False, engine='pyarrow') | |
| # self.exported_files.append(output_path) | |
| # return output_path | |
| # def export_to_json(self, data: Any, filename: str) -> Path: | |
| # output_path = self.output_dir / f"{filename}.json" | |
| # with open(output_path, 'w', encoding='utf-8') as f: | |
| # json.dump(data, f, indent=2, default=str) | |
| # self.exported_files.append(output_path) | |
| # return output_path | |
| # def create_data_model(self, tables: Dict[str, pd.DataFrame], relationships: List[Dict[str, str]] = None) -> Dict[str, Any]: | |
| # data_model = { | |
| # "tables": {}, | |
| # "relationships": relationships or [], | |
| # "created_at": datetime.now().isoformat() | |
| # } | |
| # for table_name, df in tables.items(): | |
| # data_model["tables"][table_name] = { | |
| # "columns": df.columns.tolist(), | |
| # "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}, | |
| # "row_count": len(df), | |
| # "primary_key": df.columns[0] if len(df.columns) > 0 else None | |
| # } | |
| # model_path = self.export_to_json(data_model, "powerbi_data_model") | |
| # return data_model | |
| # def create_analysis_results(self, ml_results: Dict[str, Any], dl_results: Dict[str, Any], | |
| # data_summary: Dict[str, Any]) -> pd.DataFrame: | |
| # results_df = pd.DataFrame([ | |
| # { | |
| # "metric_category": "Machine Learning", | |
| # "metric_name": "accuracy" if "accuracy" in ml_results else "mse", | |
| # "metric_value": ml_results.get("accuracy", ml_results.get("mse", 0)), | |
| # "timestamp": datetime.now() | |
| # }, | |
| # { | |
| # "metric_category": "Deep Learning", | |
| # "metric_name": "device", | |
| # "metric_value": dl_results.get("device", "unknown"), | |
| # "timestamp": datetime.now() | |
| # }, | |
| # { | |
| # "metric_category": "Data Summary", | |
| # "metric_name": "row_count", | |
| # "metric_value": data_summary.get("row_count", 0), | |
| # "timestamp": datetime.now() | |
| # } | |
| # ]) | |
| # return results_df | |
| # def export_predictions(self, df: pd.DataFrame, predictions: List[Any], | |
| # probabilities: List[List[float]] = None, filename: str = "predictions") -> Path: | |
| # result_df = df.copy() | |
| # result_df["prediction"] = predictions | |
| # if probabilities: | |
| # for i, probs in enumerate(zip(*probabilities)): | |
| # result_df[f"prob_class_{i}"] = probs | |
| # return self.export_to_csv(result_df, filename) | |
| # def create_dashboard_data(self, analysis_results: Dict[str, Any]) -> Dict[str, pd.DataFrame]: | |
| # dashboard_data = {} | |
| # if "feature_importance" in analysis_results: | |
| # dashboard_data["feature_importance"] = pd.DataFrame(analysis_results["feature_importance"]) | |
| # if "predictions" in analysis_results: | |
| # dashboard_data["predictions"] = pd.DataFrame(analysis_results["predictions"]) | |
| # if "metrics" in analysis_results: | |
| # metrics_list = [] | |
| # for key, value in analysis_results["metrics"].items(): | |
| # if isinstance(value, (int, float)): | |
| # metrics_list.append({"metric": key, "value": value}) | |
| # if metrics_list: | |
| # dashboard_data["metrics_summary"] = pd.DataFrame(metrics_list) | |
| # return dashboard_data | |
| # def export_all(self, dataframes: Dict[str, pd.DataFrame], include_parquet: bool = True) -> List[Path]: | |
| # exported = [] | |
| # for name, df in dataframes.items(): | |
| # csv_path = self.export_to_csv(df, name) | |
| # exported.append(csv_path) | |
| # if include_parquet: | |
| # parquet_path = self.export_to_parquet(df, name) | |
| # exported.append(parquet_path) | |
| # return exported | |
| # def get_exported_files(self) -> List[Path]: | |
| # return self.exported_files | |
| # def generate_powerbi_instructions(self) -> str: | |
| # instructions = """ | |
| # Power BI Integration Instructions: | |
| # ================================ | |
| # 1. Open Power BI Desktop | |
| # 2. Get Data: | |
| # - Click "Get Data" > "More..." | |
| # - Select "Text/CSV" for CSV files | |
| # - Select "Parquet" for Parquet files | |
| # 3. Load the exported data: | |
| # - Navigate to the 'output' folder | |
| # - Select the relevant CSV/Parquet files | |
| # 4. Create relationships: | |
| # - Open "Model" view | |
| # - Drag columns to create relationships between tables | |
| # 5. Build visualizations: | |
| # - Use the "Visualizations" pane | |
| # - Create charts, tables, and KPIs | |
| # Exported files are located in: {output_dir} | |
| # """.format(output_dir=str(self.output_dir)) | |
| # return instructions | |
| import pandas as pd | |
| from pathlib import Path | |
| from typing import Dict, Any, List, Union, Optional | |
| import json | |
| from datetime import datetime | |
| class PowerBIExporter: | |
| def __init__(self, output_dir: Union[str, Path]): | |
| self.output_dir = Path(output_dir) | |
| self.output_dir.mkdir(parents=True, exist_ok=True) | |
| self.exported_files: List[Path] = [] | |
| def export_to_csv(self, df: pd.DataFrame, filename: str) -> Path: | |
| output_path = self.output_dir / f"{filename}.csv" | |
| df.to_csv(output_path, index=False) | |
| self.exported_files.append(output_path) | |
| return output_path | |
| def export_to_parquet(self, df: pd.DataFrame, filename: str) -> Path: | |
| try: | |
| import pyarrow # noqa | |
| output_path = self.output_dir / f"{filename}.parquet" | |
| df.to_parquet(output_path, index=False, engine='pyarrow') | |
| self.exported_files.append(output_path) | |
| return output_path | |
| except ImportError: | |
| # Fallback to CSV if pyarrow not installed | |
| return self.export_to_csv(df, filename + "_parquet_fallback") | |
| def export_to_json(self, data: Any, filename: str) -> Path: | |
| output_path = self.output_dir / f"{filename}.json" | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, default=str) | |
| self.exported_files.append(output_path) | |
| return output_path | |
| def create_data_model( | |
| self, | |
| tables: Dict[str, pd.DataFrame], | |
| relationships: Optional[List[Dict[str, str]]] = None | |
| ) -> Dict[str, Any]: | |
| data_model: Dict[str, Any] = { | |
| "tables": {}, | |
| "relationships": relationships or [], | |
| "created_at": datetime.now().isoformat(), | |
| } | |
| for table_name, df in tables.items(): | |
| data_model["tables"][table_name] = { | |
| "columns": df.columns.tolist(), | |
| "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}, | |
| "row_count": len(df), | |
| "primary_key": df.columns[0] if len(df.columns) > 0 else None, | |
| } | |
| self.export_to_json(data_model, "powerbi_data_model") | |
| return data_model | |
| def create_analysis_results( | |
| self, | |
| ml_results: Dict[str, Any], | |
| dl_results: Dict[str, Any], | |
| data_summary: Dict[str, Any], | |
| ) -> pd.DataFrame: | |
| rows = [ | |
| { | |
| "metric_category": "Machine Learning", | |
| "metric_name": "accuracy" if "accuracy" in ml_results else "mse", | |
| "metric_value": ml_results.get("accuracy", ml_results.get("mse", 0)), | |
| "timestamp": datetime.now(), | |
| }, | |
| { | |
| "metric_category": "Deep Learning", | |
| "metric_name": "device", | |
| "metric_value": str(dl_results.get("device", "unknown")), | |
| "timestamp": datetime.now(), | |
| }, | |
| { | |
| "metric_category": "Data Summary", | |
| "metric_name": "row_count", | |
| "metric_value": data_summary.get("row_count", 0), | |
| "timestamp": datetime.now(), | |
| }, | |
| ] | |
| return pd.DataFrame(rows) | |
| def export_predictions( | |
| self, | |
| df: pd.DataFrame, | |
| predictions: List[Any], | |
| probabilities: Optional[List[List[float]]] = None, | |
| filename: str = "predictions", | |
| ) -> Path: | |
| result_df = df.copy() | |
| result_df["prediction"] = predictions | |
| if probabilities is not None: | |
| prob_array = list(zip(*probabilities)) | |
| for i, probs in enumerate(prob_array): | |
| result_df[f"prob_class_{i}"] = probs | |
| return self.export_to_csv(result_df, filename) | |
| def create_dashboard_data( | |
| self, analysis_results: Dict[str, Any] | |
| ) -> Dict[str, pd.DataFrame]: | |
| dashboard_data: Dict[str, pd.DataFrame] = {} | |
| if "feature_importance" in analysis_results: | |
| dashboard_data["feature_importance"] = pd.DataFrame( | |
| analysis_results["feature_importance"] | |
| ) | |
| if "predictions" in analysis_results: | |
| dashboard_data["predictions"] = pd.DataFrame( | |
| analysis_results["predictions"] | |
| ) | |
| if "metrics" in analysis_results: | |
| metrics_list = [ | |
| {"metric": k, "value": v} | |
| for k, v in analysis_results["metrics"].items() | |
| if isinstance(v, (int, float)) | |
| ] | |
| if metrics_list: | |
| dashboard_data["metrics_summary"] = pd.DataFrame(metrics_list) | |
| return dashboard_data | |
| def export_all( | |
| self, | |
| dataframes: Dict[str, pd.DataFrame], | |
| include_parquet: bool = True, | |
| ) -> List[Path]: | |
| exported: List[Path] = [] | |
| for name, df in dataframes.items(): | |
| exported.append(self.export_to_csv(df, name)) | |
| if include_parquet: | |
| exported.append(self.export_to_parquet(df, name)) | |
| return exported | |
| def get_exported_files(self) -> List[Path]: | |
| return self.exported_files | |
| def generate_powerbi_instructions(self) -> str: | |
| return f""" | |
| Power BI Integration Instructions | |
| =================================== | |
| 1. Open Power BI Desktop | |
| 2. Get Data: | |
| - Click "Get Data" → "More..." | |
| - Select "Text/CSV" for CSV files | |
| - Select "Parquet" for Parquet files | |
| 3. Load the exported data: | |
| - Navigate to: {self.output_dir} | |
| - Select the relevant CSV/Parquet files | |
| 4. Create relationships (Model view): | |
| - Drag shared columns between tables to link them | |
| 5. Build visualizations: | |
| - Use the "Visualizations" pane to create charts, KPIs, tables | |
| Exported files location: {self.output_dir} | |
| Total files exported: {len(self.exported_files)} | |
| """ |