Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| # Step 1: Extract | |
| def extract_data(file_path): | |
| """Extracts data from a CSV file.""" | |
| try: | |
| data = pd.read_csv(file_path) | |
| print("Data extraction successful.") | |
| return data | |
| except Exception as e: | |
| print(f"Error in data extraction: {e}") | |
| return None | |
| # Step 2: Transform | |
| def transform_data(data): | |
| """Transforms the data by cleaning and adding new features.""" | |
| try: | |
| # Drop rows with missing values | |
| data_cleaned = data.dropna().copy() | |
| # Add a new column for Tax (assuming a flat 10% tax rate on salary) | |
| # data_cleaned["tax"] = data_cleaned["salary"] * 0.1 | |
| data_cleaned.loc[:, "tax"] = data_cleaned["salary"] * 0.1 | |
| # Calculate net salary after tax | |
| # data_cleaned["net_salary"] = data_cleaned["salary"] - data_cleaned["tax"] | |
| data_cleaned.loc[:, "net_salary"] = data_cleaned["salary"] - data_cleaned["tax"] | |
| # data_cleaned["net_salary"] = model.predict(X) | |
| print("Data transformation successful.") | |
| return data_cleaned | |
| except Exception as e: | |
| print(f"Error in data transformation: {e}") | |
| return None | |
| # # Step 3: Load | |
| # def load_data(data, output_file_path): | |
| # """Loads the transformed data into a new CSV file.""" | |
| # try: | |
| # data.to_csv(output_file_path, index=False) | |
| # print(f"Data loaded successfully to {output_file_path}.") | |
| # except Exception as e: | |
| # print(f"Error in data loading: {e}") | |
| # # Main ETL function | |
| # def etl_process(input_file, output_file): | |
| # data = extract_data(input_file) | |
| # if data is not None: | |
| # transformed_data = transform_data(data) | |
| # if transformed_data is not None: | |
| # load_data(transformed_data, output_file) | |
| # if __name__ == "__main__": | |
| # input_file = "input_data.csv" | |
| # output_file = "output_data.csv" | |
| # etl_process(input_file, output_file) | |
| # Step 3: Load | |
| def load_data(data, output_file_path): | |
| """Loads the transformed data into a new CSV file.""" | |
| try: | |
| # Assurer que le dossier `data/` existe | |
| output_dir = os.path.dirname(output_file_path) | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| print(f"π Created missing directory: {output_dir}") | |
| # Sauvegarde du fichier | |
| data.to_csv(output_file_path, index=False) | |
| print(f"β Data loaded successfully to {output_file_path}.") | |
| except Exception as e: | |
| print(f"β Error in data loading: {e}") | |
| # Main ETL function | |
| def etl_process(input_file, output_file): | |
| print("π Starting ETL Process...") | |
| data = extract_data(input_file) | |
| if data is not None: | |
| transformed_data = transform_data(data) | |
| if transformed_data is not None: | |
| load_data(transformed_data, output_file) | |
| print("β ETL Process Completed!") | |
| if __name__ == "__main__": | |
| input_file = "data/input_data.csv" # Assurez-vous que le fichier est bien lΓ | |
| output_file = "data/output_data.csv" # Sauvegarde bien dans `data/` | |
| etl_process(input_file, output_file) | |