Spaces:

p2ov
/

streamlit_app

Sleeping

App Files Files Community

streamlit_app / main.py

Alquilar78

Modification Config et Squelette

c4d546e 7 months ago

raw

history blame contribute delete

3.13 kB

	import pandas as pd
	import os


	# Step 1: Extract
	def extract_data(file_path):
	"""Extracts data from a CSV file."""
	try:
	data = pd.read_csv(file_path)
	print("Data extraction successful.")
	return data
	except Exception as e:
	print(f"Error in data extraction: {e}")
	return None


	# Step 2: Transform
	def transform_data(data):
	"""Transforms the data by cleaning and adding new features."""
	try:
	# Drop rows with missing values
	data_cleaned = data.dropna().copy()

	# Add a new column for Tax (assuming a flat 10% tax rate on salary)
	# data_cleaned["tax"] = data_cleaned["salary"] * 0.1
	data_cleaned.loc[:, "tax"] = data_cleaned["salary"] * 0.1

	# Calculate net salary after tax
	# data_cleaned["net_salary"] = data_cleaned["salary"] - data_cleaned["tax"]
	data_cleaned.loc[:, "net_salary"] = data_cleaned["salary"] - data_cleaned["tax"]

	# data_cleaned["net_salary"] = model.predict(X)

	print("Data transformation successful.")
	return data_cleaned
	except Exception as e:
	print(f"Error in data transformation: {e}")
	return None


	# # Step 3: Load
	# def load_data(data, output_file_path):
	# """Loads the transformed data into a new CSV file."""
	# try:
	# data.to_csv(output_file_path, index=False)
	# print(f"Data loaded successfully to {output_file_path}.")
	# except Exception as e:
	# print(f"Error in data loading: {e}")


	# # Main ETL function
	# def etl_process(input_file, output_file):
	# data = extract_data(input_file)
	# if data is not None:
	# transformed_data = transform_data(data)
	# if transformed_data is not None:
	# load_data(transformed_data, output_file)


	# if __name__ == "__main__":
	# input_file = "input_data.csv"
	# output_file = "output_data.csv"
	# etl_process(input_file, output_file)


	# Step 3: Load
	def load_data(data, output_file_path):
	"""Loads the transformed data into a new CSV file."""
	try:
	# Assurer que le dossier `data/` existe
	output_dir = os.path.dirname(output_file_path)
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	print(f"📂 Created missing directory: {output_dir}")

	# Sauvegarde du fichier
	data.to_csv(output_file_path, index=False)
	print(f"✅ Data loaded successfully to {output_file_path}.")
	except Exception as e:
	print(f"❌ Error in data loading: {e}")


	# Main ETL function
	def etl_process(input_file, output_file):
	print("🚀 Starting ETL Process...")

	data = extract_data(input_file)
	if data is not None:
	transformed_data = transform_data(data)
	if transformed_data is not None:
	load_data(transformed_data, output_file)

	print("✅ ETL Process Completed!")


	if __name__ == "__main__":
	input_file = "data/input_data.csv" # Assurez-vous que le fichier est bien là
	output_file = "data/output_data.csv" # Sauvegarde bien dans `data/`

	etl_process(input_file, output_file)