| import pickle | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| def read_data_step_1(): | |
| def read_pkl(file_dir): | |
| res = [] | |
| taxi = pickle.load(open(file_dir, "rb" )) | |
| count = 0 | |
| for seq in taxi['seqs']: | |
| if len(seq) > 34: | |
| count += 1 | |
| res.append(seq) | |
| # print(np.max(seq['time_since_last_event'])) | |
| print(count) | |
| return res | |
| # from Mei et al 's paper on event imputation | |
| train_res = read_pkl('pilottaxi/big/train.pkl') | |
| dev_res = read_pkl('pilottaxi/big/dev.pkl') | |
| test_res = read_pkl('pilottaxi/big/test1.pkl') | |
| with open('../data/taxi/train.pkl', "wb") as f_out: | |
| pickle.dump( | |
| { | |
| "dim_process": 10, | |
| 'train': train_res[:1500] | |
| }, f_out | |
| ) | |
| with open('../data/taxi/dev.pkl', "wb") as f_out: | |
| pickle.dump( | |
| { | |
| "dim_process": 10, | |
| 'dev': dev_res[:200] | |
| }, f_out | |
| ) | |
| with open('../data/taxi/test.pkl', "wb") as f_out: | |
| pickle.dump( | |
| { | |
| "dim_process": 10, | |
| 'test': test_res[:400] | |
| }, f_out | |
| ) | |
| return | |
| if __name__ == '__main__': | |
| read_data_step_1() |