Spaces:
Paused
Paused
| import os | |
| import numpy as np | |
| import json | |
| def read_ldr_file(file_path): | |
| with open(file_path, 'r') as f: | |
| lines = f.readlines() | |
| return lines | |
| def parse_ldr_lines(lines): | |
| parts = [] | |
| for line in lines: | |
| if line.startswith('1'): # LDR文件中的零件数据行通常以"1"开头 | |
| parts.append(line.strip()) # 处理零件信息 | |
| elif line.startswith('0'): # "0"行通常是注释或其他控制信息 | |
| pass | |
| else: | |
| pass | |
| return parts | |
| class SingLegoDataset: | |
| def __init__(self, args, split_set="train"): | |
| super().__init__() | |
| self.split_set = split_set | |
| data = np.load(os.path.join(args.data_dir, "Car Arcade_wrdhot" + ".npy"), allow_pickle=True) | |
| self.data = [data]#[data[name] for name in data.files] | |
| #self.prompts = json.load(open(os.path.join(args.data_dir, "text.json"), 'r'))['minecraft'] | |
| print(f"{split_set} dataset total data samples: {len(self.data)}") | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| data = self.data[idx] | |
| prompt = self.prompts[idx] | |
| #import ipdb; ipdb.set_trace() | |
| data_dict = {} | |
| data_dict['prompt'] = prompt | |
| data_dict['latent'] = data | |
| return data_dict | |
| class LegosDataset: | |
| def __init__(self, args, split_set="train"): | |
| super().__init__() | |
| self.max_num_tokens = 410 | |
| self.perm_num = -1 | |
| self.split_set = split_set | |
| #data = np.load(os.path.join(args.data_dir, "all_ldr_data_lr30_train_sort.npz"), allow_pickle=True)['data'] | |
| data = np.load(os.path.join(args.data_dir, "train_1k.npz"), allow_pickle=True)['data'] | |
| #self.data = [self.padding(data[i], self.max_num_tokens) for i in range(len(data))] | |
| #self.data = [data[i] for i in range(len(data))] | |
| prompts = json.load(open(os.path.join(args.data_dir, "dense_captions", "dense_captions_rmthan300.json"), 'r'))['Car'] | |
| #latent = np.load(os.path.join(args.data_dir, "latents_train.npy"), allow_pickle=True) | |
| bboxs = np.load(os.path.join(args.data_dir, "all_coordinates_train.npy"), allow_pickle=True) | |
| self.data, self.prompts, self.bboxs = self.process_data(data, prompts, bboxs) | |
| # self.latent = self.padding_latent(latent, self.max_num_tokens).astype(np.int64) | |
| # self.data = [self.data[0]] | |
| # self.prompts = [self.prompts[0]] | |
| # self.bboxs = [self.bboxs[0]] | |
| print(f"{split_set} dataset total data samples: {len(self.data)}") | |
| def padding_latent(self, data, max_len=300): | |
| # if data.shape[0] > max_len: | |
| # print(data.shape[0]) | |
| pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=16386) | |
| # pad_data[data.shape[0]-max_len:,-1] = 1 #flag label | |
| # pad_data[data.shape[0]-max_len:,-2] = 0 | |
| return pad_data | |
| def padding(self, data, max_len=300): | |
| # if data.shape[0] > max_len: | |
| # print(data.shape[0]) | |
| pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=-1) | |
| pad_data[data.shape[0]-max_len:,-1] = 1 #flag label | |
| pad_data[data.shape[0]-max_len:,-2] = 0 | |
| return pad_data | |
| def permute(self, data, n_permutations=3): | |
| return [data] + [data[np.random.permutation(len(data))] for _ in range(n_permutations-1)] | |
| def process_data(self, data, prompts, bboxs): | |
| processed_data, processed_prompts, processed_bboxs = [], [], [] | |
| for i in range(len(data)): | |
| if self.perm_num > 0: | |
| permuted_samples = self.permute(data[i], self.perm_num) | |
| processed_data.extend([self.padding(p, self.max_num_tokens) for p in permuted_samples]) | |
| processed_prompts.extend([prompts[i]] * self.perm_num) | |
| processed_bboxs.extend([bboxs[i]] * self.perm_num) | |
| else: | |
| processed_data.append(self.padding(data[i], self.max_num_tokens)) | |
| processed_prompts.append(prompts[i]) | |
| processed_bboxs.append(bboxs[i]) | |
| return processed_data, processed_prompts, np.array(processed_bboxs) | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| data = self.data[idx] | |
| prompt = self.prompts[idx] | |
| bbox = self.bboxs[idx] | |
| #latent = self.latent[idx] | |
| #import ipdb; ipdb.set_trace() | |
| data_dict = {} | |
| data_dict['prompt'] = prompt | |
| data_dict['target'] = data | |
| data_dict['bbox'] = bbox | |
| #data_dict['latent'] = latent | |
| return data_dict | |
| class LegosTestDataset: | |
| def __init__(self, args, split_set="test"): | |
| super().__init__() | |
| self.max_num_tokens = 410 | |
| self.perm_num = -1 | |
| self.split_set = split_set | |
| data = np.load(os.path.join(args.data_dir, "test_1k.npz"), allow_pickle=True)['data'] | |
| #self.data = [self.padding(data[i], self.max_num_tokens) for i in range(len(data))] | |
| #self.data = [data[i] for i in range(len(data))] | |
| prompts = json.load(open(os.path.join(args.data_dir, "dense_captions", "dense_captions_rmthan300.json"), 'r'))['Car'] | |
| bboxs = np.load(os.path.join(args.data_dir, "all_coordinates_test.npy"), allow_pickle=True) | |
| self.data, self.prompts, self.bboxs = self.process_data(data, prompts, bboxs) | |
| # latent = np.load(os.path.join(args.data_dir, "latents_test.npy"), allow_pickle=True) | |
| # self.latent = self.padding_latent(latent, self.max_num_tokens).astype(np.int64) | |
| #import ipdb; ipdb.set_trace() | |
| # self.data = [self.data[1]] | |
| # self.prompts = [self.prompts[0]] | |
| # self.bboxs = [self.bboxs[1]] | |
| print(f"{split_set} dataset total data samples: {len(self.data)}") | |
| def padding_latent(self, data, max_len=300): | |
| # if data.shape[0] > max_len: | |
| # print(data.shape[0]) | |
| pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=16386) | |
| # pad_data[data.shape[0]-max_len:,-1] = 1 #flag label | |
| # pad_data[data.shape[0]-max_len:,-2] = 0 | |
| return pad_data | |
| def padding(self, data, max_len=300): | |
| # if data.shape[0] > max_len: | |
| # print(data.shape[0]) | |
| pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=-1) | |
| pad_data[data.shape[0]-max_len:,-1] = 1 #flag label | |
| pad_data[data.shape[0]-max_len:,-2] = 0 | |
| return pad_data | |
| def permute(self, data, n_permutations=3): | |
| return [data] + [data[np.random.permutation(len(data))] for _ in range(n_permutations-1)] | |
| def process_data(self, data, prompts, bboxs): | |
| processed_data, processed_prompts, processed_bboxs = [], [], [] | |
| for i in range(len(data)): | |
| if self.perm_num > 0: | |
| permuted_samples = self.permute(data[i], self.perm_num) | |
| processed_data.extend([self.padding(p, self.max_num_tokens) for p in permuted_samples]) | |
| processed_prompts.extend([prompts[i]] * self.perm_num) | |
| processed_bboxs.extend([bboxs[i]] * self.perm_num) | |
| else: | |
| processed_data.append(self.padding(data[i], self.max_num_tokens)) | |
| processed_prompts.append(prompts[i]) | |
| processed_bboxs.append(bboxs[i]) | |
| return processed_data, processed_prompts, np.array(processed_bboxs) | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| data = self.data[idx] | |
| prompt = self.prompts[idx] | |
| bbox = self.bboxs[idx] | |
| #latent = self.latent[idx] | |
| #import ipdb; ipdb.set_trace() | |
| data_dict = {} | |
| data_dict['prompt'] = prompt | |
| data_dict['target'] = data | |
| #data_dict['latent'] = latent | |
| data_dict['bbox'] = bbox | |
| return data_dict | |
| class CubeDataset: | |
| def __init__(self, args, split_set="train"): | |
| super().__init__() | |
| # self.num_tokens = args.n_discrete_size | |
| # self.no_aug = args.no_aug | |
| self.split_set = split_set | |
| # if split_set == "test": | |
| # self.no_aug = True | |
| data = np.load(os.path.join(args.data_dir, split_set + ".npz"), allow_pickle=True) | |
| self.data = [data[name] for name in data.files] | |
| # if cur_data['faces_num'] <= self.max_triangles | |
| # and cur_data['faces_num'] >= self.min_triangles] | |
| self.prompts = json.load(open(os.path.join(args.data_dir, "text.json"), 'r'))['minecraft'] | |
| print(f"{split_set} dataset total data samples: {len(self.data)}") | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| data = self.data[idx] | |
| #prompt = self.prompts[idx] | |
| #import ipdb; ipdb.set_trace() | |
| data_dict = {} | |
| #data_dict['prompt'] = prompt | |
| data_dict['latent'] = data | |
| return data_dict | |
| if __name__ == "__main__": | |
| file_path = '/public/home/wangshuo/gap/assembly/data/blue classic car/blue classic car.ldr' | |
| ldr_lines = read_ldr_file(file_path) | |
| parsed_parts = parse_ldr_lines(ldr_lines) | |
| # import ipdb; ipdb.set_trace() | |
| for part in parsed_parts: | |
| print(part) | |