import numpy as np import pandas as pd import torch import sys import os from sklearn.preprocessing import StandardScaler,MinMaxScaler np.set_printoptions(suppress=True) ss,mm = StandardScaler(), MinMaxScaler() thigh = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/front.txt",delimiter=',',usecols =[i for i in range(13) if i != 0]) shin = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/back.txt",delimiter=',',usecols =[i for i in range(13) if i != 0]) thigh,shin = thigh.dropna(),shin.dropna() delta = len(thigh) - len(shin) thigh = thigh[delta:] thigh.reset_index(inplace=True) thigh,shin = thigh[:55000],shin[:55000] for col in thigh.columns: thigh.rename(columns={col:col+"_th"},inplace=True) shin.rename(columns={col:col+"_sh"},inplace=True) p_columns_th = [col for col in thigh.columns if col.startswith('p')] s_columns_th = [col for col in thigh.columns if col.startswith('s')] p_columns_sh = [col for col in shin.columns if col.startswith('p')] s_columns_sh = [col for col in shin.columns if col.startswith('s')] features = thigh[s_columns_th] features = pd.concat([features,shin[s_columns_sh]],axis=1) labels = thigh[p_columns_th] labels = pd.concat([labels,shin[p_columns_sh]],axis=1) features_scaled = pd.DataFrame(ss.fit_transform(features), columns=features.columns) labels_scaled = pd.DataFrame(mm.fit_transform(labels), columns=labels.columns) os.makedirs(f"../../data/processed/{sys.argv[1]}",exist_ok=True) features.to_csv(f"../../data/processed/{sys.argv[1]}/features.csv") labels.to_csv(f"../../data/processed/{sys.argv[1]}/labels.csv") def preprocess_data(features_df, labels_df, lookback_window, predict_window, output_file): lookback_window *= 150 predict_window *= 150 total_samples = len(features_df) - lookback_window - predict_window x_data = torch.zeros((total_samples, lookback_window, features_df.shape[1])) y_data = torch.zeros((total_samples, predict_window, labels_df.shape[1])) for idx, i in enumerate(range(lookback_window, len(features_df) - predict_window)): if idx % 1000 == 0: print(f"Processing sample {idx}/{total_samples}...") x_data[idx] = torch.tensor(features_df.iloc[i - lookback_window:i].values, dtype=torch.float32) y_data[idx] = torch.tensor(labels_df.iloc[i:i + predict_window].values, dtype=torch.float32) torch.save({"x": x_data, "y": y_data}, output_file) print(f"Preprocessed data saved to {output_file}") preprocess_data(features_scaled,labels_scaled,3,3,f"../../data/processed/{sys.argv[1]}/data.pt")