recorderlegend1
/

starx

Model card Files Files and versions

starx / model /preprocess /preprocess.py

recorderlegend1's picture

recorderlegend1

Upload folder using huggingface_hub

e418c5a verified over 1 year ago

history blame contribute delete

2.57 kB

	import numpy as np
	import pandas as pd
	import torch
	import sys
	import os
	from sklearn.preprocessing import StandardScaler,MinMaxScaler
	np.set_printoptions(suppress=True)

	ss,mm = StandardScaler(), MinMaxScaler()
	thigh = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/front.txt",delimiter=',',usecols =[i for i in range(13) if i != 0])
	shin = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/back.txt",delimiter=',',usecols =[i for i in range(13) if i != 0])
	thigh,shin = thigh.dropna(),shin.dropna()
	delta = len(thigh) - len(shin)

	thigh = thigh[delta:]
	thigh.reset_index(inplace=True)
	thigh,shin = thigh[:55000],shin[:55000]

	for col in thigh.columns:
	thigh.rename(columns={col:col+"_th"},inplace=True)
	shin.rename(columns={col:col+"_sh"},inplace=True)


	p_columns_th = [col for col in thigh.columns if col.startswith('p')]
	s_columns_th = [col for col in thigh.columns if col.startswith('s')]
	p_columns_sh = [col for col in shin.columns if col.startswith('p')]
	s_columns_sh = [col for col in shin.columns if col.startswith('s')]



	features = thigh[s_columns_th]
	features = pd.concat([features,shin[s_columns_sh]],axis=1)

	labels = thigh[p_columns_th]
	labels = pd.concat([labels,shin[p_columns_sh]],axis=1)

	features_scaled = pd.DataFrame(ss.fit_transform(features), columns=features.columns)
	labels_scaled = pd.DataFrame(mm.fit_transform(labels), columns=labels.columns)

	os.makedirs(f"../../data/processed/{sys.argv[1]}",exist_ok=True)
	features.to_csv(f"../../data/processed/{sys.argv[1]}/features.csv")
	labels.to_csv(f"../../data/processed/{sys.argv[1]}/labels.csv")

	def preprocess_data(features_df, labels_df, lookback_window, predict_window, output_file):
	lookback_window *= 150
	predict_window *= 150

	total_samples = len(features_df) - lookback_window - predict_window

	x_data = torch.zeros((total_samples, lookback_window, features_df.shape[1]))
	y_data = torch.zeros((total_samples, predict_window, labels_df.shape[1]))

	for idx, i in enumerate(range(lookback_window, len(features_df) - predict_window)):
	if idx % 1000 == 0:
	print(f"Processing sample {idx}/{total_samples}...")

	x_data[idx] = torch.tensor(features_df.iloc[i - lookback_window:i].values, dtype=torch.float32)
	y_data[idx] = torch.tensor(labels_df.iloc[i:i + predict_window].values, dtype=torch.float32)

	torch.save({"x": x_data, "y": y_data}, output_file)
	print(f"Preprocessed data saved to {output_file}")

	preprocess_data(features_scaled,labels_scaled,3,3,f"../../data/processed/{sys.argv[1]}/data.pt")