Spaces:

Debashre2824
/

tourism_predicton

Runtime error

App Files Files Community

tourism_predicton / prep.py

Debashre2824

Upload Streamlit app and model

65f82fd verified 7 months ago

raw

history blame contribute delete

1.72 kB

	# for data manipulation
	import pandas as pd
	import sklearn
	# for creating a folder
	import os
	# for data preprocessing and pipeline creation
	from sklearn.model_selection import train_test_split
	# for converting text data in to numerical representation
	from sklearn.preprocessing import LabelEncoder
	# for hugging face space authentication to upload files
	from huggingface_hub import login, HfApi



	# Define constants for the dataset and output paths
	api = HfApi(token=os.getenv("HF_TOKEN"))
	DATASET_PATH = "hf://datasets/Debashre2824/tourism_predicton/tourism.csv"
	df = pd.read_csv(DATASET_PATH)
	print("Dataset loaded successfully.")

	##Data Clean up ###
	##Drop the uniqueidentifier

	# Drop the unique identifier
	df.drop(columns=['Unnamed: 0'], inplace=True)

	df.drop(columns=['CustomerID'], inplace=True)

	# Encoding the categorical 'Designation' column
	label_encoder = LabelEncoder()
	df['Designation'] = label_encoder.fit_transform(df['Designation'])

	#dropping designation also
	df.drop(columns=['Designation'], inplace=True)


	target_col = 'ProdTaken'

	# Split into X (features) and y (target)
	X = df.drop(columns=[target_col])
	y = df[target_col]

	# Perform train-test split
	Xtrain, Xtest, ytrain, ytest = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	Xtrain.to_csv("Xtrain.csv",index=False)
	Xtest.to_csv("Xtest.csv",index=False)
	ytrain.to_csv("ytrain.csv",index=False)
	ytest.to_csv("ytest.csv",index=False)


	files = ["Xtrain.csv","Xtest.csv","ytrain.csv","ytest.csv"]

	for file_path in files:
	api.upload_file(
	path_or_fileobj=file_path,
	path_in_repo=file_path.split("/")[-1], # just the filename
	repo_id="Debashre2824/tourism_predicton",
	repo_type="dataset",
	)