Spaces:
Running
Running
File size: 1,949 Bytes
8e0e7a8 26b9f92 8e0e7a8 26b9f92 8e0e7a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# for data manipulation
import pandas as pd
import sklearn
## EDA
import matplotlib.pyplot as plt
import seaborn as sns
import math
from xgboost import XGBClassifier
# for creating a folder
import os
# for data preprocessing and pipeline creation
from sklearn.model_selection import train_test_split
# for converting text data in to numerical representation
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# for hugging face space authentication to upload files
from huggingface_hub import login, HfApi, hf_hub_download
# format for EDA visualisation
sns.set(style="whitegrid", font_scale=1.1)
# Define constants for the dataset and output paths
api = HfApi(token=os.getenv("HF_TOKEN"))
# read data for Huggingface dataset space
DATASET_PATH = "hf://datasets/sudhirpgcmma02/Engine_PM/data/engine_data.csv"
df = pd.read_csv(DATASET_PATH)
data_df=df.copy()
#Features naming standardisation for easy handling
df.columns = (df.columns
.str.strip()
.str.replace(" ","_")
.str.replace(r"[^\w]","_",regex=True)
)
# Targe varaible intialisation
target_col = 'Engine_Condition'
# Split into X (features) and y (target)
X = df.drop(columns=[target_col])
y = df[target_col]
# Perform train-test split
Xtrain, Xtest, ytrain, ytest = train_test_split(
X, y, test_size=0.2, random_state=42
)
Xtrain.to_csv("Xtrain.csv",index=False)
Xtest.to_csv("Xtest.csv",index=False)
ytrain.to_csv("ytrain.csv",index=False)
ytest.to_csv("ytest.csv",index=False)
files = ["Xtrain.csv","Xtest.csv","ytrain.csv","ytest.csv"]
for file_path in files:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_path.split("/")[-1], # just the filename
repo_id="sudhirpgcmma02/Engine_PM",
repo_type="dataset",
)
print("Dataset after split loaded successfully to Huggingface.....")
|