Spaces:
Runtime error
Runtime error
| # for data manipulation | |
| import pandas as pd | |
| import sklearn | |
| # for creating a folder | |
| import os | |
| # for data preprocessing and pipeline creation | |
| from sklearn.model_selection import train_test_split | |
| # for converting text data in to numerical representation | |
| from sklearn.preprocessing import LabelEncoder | |
| # for hugging face space authentication to upload files | |
| from huggingface_hub import login, HfApi | |
| # Define constants for the dataset and output paths | |
| api = HfApi(token=os.getenv("HF_TOKEN")) | |
| DATASET_PATH = "hf://datasets/Debashre2824/tourism_predicton/tourism.csv" | |
| df = pd.read_csv(DATASET_PATH) | |
| print("Dataset loaded successfully.") | |
| ##Data Clean up ### | |
| ##Drop the uniqueidentifier | |
| # Drop the unique identifier | |
| df.drop(columns=['Unnamed: 0'], inplace=True) | |
| df.drop(columns=['CustomerID'], inplace=True) | |
| # Encoding the categorical 'Designation' column | |
| label_encoder = LabelEncoder() | |
| df['Designation'] = label_encoder.fit_transform(df['Designation']) | |
| #dropping designation also | |
| df.drop(columns=['Designation'], inplace=True) | |
| target_col = 'ProdTaken' | |
| # Split into X (features) and y (target) | |
| X = df.drop(columns=[target_col]) | |
| y = df[target_col] | |
| # Perform train-test split | |
| Xtrain, Xtest, ytrain, ytest = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| Xtrain.to_csv("Xtrain.csv",index=False) | |
| Xtest.to_csv("Xtest.csv",index=False) | |
| ytrain.to_csv("ytrain.csv",index=False) | |
| ytest.to_csv("ytest.csv",index=False) | |
| files = ["Xtrain.csv","Xtest.csv","ytrain.csv","ytest.csv"] | |
| for file_path in files: | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=file_path.split("/")[-1], # just the filename | |
| repo_id="Debashre2824/tourism_predicton", | |
| repo_type="dataset", | |
| ) | |