streamlit_app / main.py
Alquilar78's picture
Modification Config et Squelette
c4d546e
import pandas as pd
import os
# Step 1: Extract
def extract_data(file_path):
"""Extracts data from a CSV file."""
try:
data = pd.read_csv(file_path)
print("Data extraction successful.")
return data
except Exception as e:
print(f"Error in data extraction: {e}")
return None
# Step 2: Transform
def transform_data(data):
"""Transforms the data by cleaning and adding new features."""
try:
# Drop rows with missing values
data_cleaned = data.dropna().copy()
# Add a new column for Tax (assuming a flat 10% tax rate on salary)
# data_cleaned["tax"] = data_cleaned["salary"] * 0.1
data_cleaned.loc[:, "tax"] = data_cleaned["salary"] * 0.1
# Calculate net salary after tax
# data_cleaned["net_salary"] = data_cleaned["salary"] - data_cleaned["tax"]
data_cleaned.loc[:, "net_salary"] = data_cleaned["salary"] - data_cleaned["tax"]
# data_cleaned["net_salary"] = model.predict(X)
print("Data transformation successful.")
return data_cleaned
except Exception as e:
print(f"Error in data transformation: {e}")
return None
# # Step 3: Load
# def load_data(data, output_file_path):
# """Loads the transformed data into a new CSV file."""
# try:
# data.to_csv(output_file_path, index=False)
# print(f"Data loaded successfully to {output_file_path}.")
# except Exception as e:
# print(f"Error in data loading: {e}")
# # Main ETL function
# def etl_process(input_file, output_file):
# data = extract_data(input_file)
# if data is not None:
# transformed_data = transform_data(data)
# if transformed_data is not None:
# load_data(transformed_data, output_file)
# if __name__ == "__main__":
# input_file = "input_data.csv"
# output_file = "output_data.csv"
# etl_process(input_file, output_file)
# Step 3: Load
def load_data(data, output_file_path):
"""Loads the transformed data into a new CSV file."""
try:
# Assurer que le dossier `data/` existe
output_dir = os.path.dirname(output_file_path)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"πŸ“‚ Created missing directory: {output_dir}")
# Sauvegarde du fichier
data.to_csv(output_file_path, index=False)
print(f"βœ… Data loaded successfully to {output_file_path}.")
except Exception as e:
print(f"❌ Error in data loading: {e}")
# Main ETL function
def etl_process(input_file, output_file):
print("πŸš€ Starting ETL Process...")
data = extract_data(input_file)
if data is not None:
transformed_data = transform_data(data)
if transformed_data is not None:
load_data(transformed_data, output_file)
print("βœ… ETL Process Completed!")
if __name__ == "__main__":
input_file = "data/input_data.csv" # Assurez-vous que le fichier est bien lΓ 
output_file = "data/output_data.csv" # Sauvegarde bien dans `data/`
etl_process(input_file, output_file)