Spaces:
Running
Running
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from mlpipeline.entity import DataTransformationConfig, DataTransformationArtifact | |
| from mlpipeline.logging.logger import get_logger | |
| from mlpipeline.exception import DataTransformationException | |
| import sys | |
| import os | |
| logger = get_logger(__name__) | |
| class DataTransformation: | |
| def __init__(self, config: DataTransformationConfig): | |
| self.config = config | |
| def transform(self) -> DataTransformationArtifact: | |
| try: | |
| logger.info("Starting data transformation") | |
| df = pd.read_csv(self.config.data_path) | |
| train_df, test_df = train_test_split( | |
| df, | |
| test_size=self.config.test_size, | |
| random_state=self.config.random_state | |
| ) | |
| os.makedirs(self.config.root_dir, exist_ok=True) | |
| train_df.to_csv(self.config.train_path, index=False) | |
| test_df.to_csv(self.config.test_path, index=False) | |
| logger.info(f"Train shape: {train_df.shape}, Test shape: {test_df.shape}") | |
| return DataTransformationArtifact( | |
| train_file_path=self.config.train_path, | |
| test_file_path=self.config.test_path, | |
| is_transformed=True, | |
| message=f"Train: {train_df.shape}, Test: {test_df.shape}" | |
| ) | |
| except Exception as e: | |
| raise DataTransformationException(str(e), sys) |