import os import numpy as np import json def read_ldr_file(file_path): with open(file_path, 'r') as f: lines = f.readlines() return lines def parse_ldr_lines(lines): parts = [] for line in lines: if line.startswith('1'): # LDR文件中的零件数据行通常以"1"开头 parts.append(line.strip()) # 处理零件信息 elif line.startswith('0'): # "0"行通常是注释或其他控制信息 pass else: pass return parts class SingLegoDataset: def __init__(self, args, split_set="train"): super().__init__() self.split_set = split_set data = np.load(os.path.join(args.data_dir, "Car Arcade_wrdhot" + ".npy"), allow_pickle=True) self.data = [data]#[data[name] for name in data.files] #self.prompts = json.load(open(os.path.join(args.data_dir, "text.json"), 'r'))['minecraft'] print(f"{split_set} dataset total data samples: {len(self.data)}") def __len__(self): return len(self.data) def __getitem__(self, idx): data = self.data[idx] prompt = self.prompts[idx] #import ipdb; ipdb.set_trace() data_dict = {} data_dict['prompt'] = prompt data_dict['latent'] = data return data_dict class LegosDataset: def __init__(self, args, split_set="train"): super().__init__() self.max_num_tokens = 410 self.perm_num = -1 self.split_set = split_set #data = np.load(os.path.join(args.data_dir, "all_ldr_data_lr30_train_sort.npz"), allow_pickle=True)['data'] data = np.load(os.path.join(args.data_dir, "train_1k.npz"), allow_pickle=True)['data'] #self.data = [self.padding(data[i], self.max_num_tokens) for i in range(len(data))] #self.data = [data[i] for i in range(len(data))] prompts = json.load(open(os.path.join(args.data_dir, "dense_captions", "dense_captions_rmthan300.json"), 'r'))['Car'] #latent = np.load(os.path.join(args.data_dir, "latents_train.npy"), allow_pickle=True) bboxs = np.load(os.path.join(args.data_dir, "all_coordinates_train.npy"), allow_pickle=True) self.data, self.prompts, self.bboxs = self.process_data(data, prompts, bboxs) # self.latent = self.padding_latent(latent, self.max_num_tokens).astype(np.int64) # self.data = [self.data[0]] # self.prompts = [self.prompts[0]] # self.bboxs = [self.bboxs[0]] print(f"{split_set} dataset total data samples: {len(self.data)}") def padding_latent(self, data, max_len=300): # if data.shape[0] > max_len: # print(data.shape[0]) pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=16386) # pad_data[data.shape[0]-max_len:,-1] = 1 #flag label # pad_data[data.shape[0]-max_len:,-2] = 0 return pad_data def padding(self, data, max_len=300): # if data.shape[0] > max_len: # print(data.shape[0]) pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=-1) pad_data[data.shape[0]-max_len:,-1] = 1 #flag label pad_data[data.shape[0]-max_len:,-2] = 0 return pad_data def permute(self, data, n_permutations=3): return [data] + [data[np.random.permutation(len(data))] for _ in range(n_permutations-1)] def process_data(self, data, prompts, bboxs): processed_data, processed_prompts, processed_bboxs = [], [], [] for i in range(len(data)): if self.perm_num > 0: permuted_samples = self.permute(data[i], self.perm_num) processed_data.extend([self.padding(p, self.max_num_tokens) for p in permuted_samples]) processed_prompts.extend([prompts[i]] * self.perm_num) processed_bboxs.extend([bboxs[i]] * self.perm_num) else: processed_data.append(self.padding(data[i], self.max_num_tokens)) processed_prompts.append(prompts[i]) processed_bboxs.append(bboxs[i]) return processed_data, processed_prompts, np.array(processed_bboxs) def __len__(self): return len(self.data) def __getitem__(self, idx): data = self.data[idx] prompt = self.prompts[idx] bbox = self.bboxs[idx] #latent = self.latent[idx] #import ipdb; ipdb.set_trace() data_dict = {} data_dict['prompt'] = prompt data_dict['target'] = data data_dict['bbox'] = bbox #data_dict['latent'] = latent return data_dict class LegosTestDataset: def __init__(self, args, split_set="test"): super().__init__() self.max_num_tokens = 410 self.perm_num = -1 self.split_set = split_set data = np.load(os.path.join(args.data_dir, "test_1k.npz"), allow_pickle=True)['data'] #self.data = [self.padding(data[i], self.max_num_tokens) for i in range(len(data))] #self.data = [data[i] for i in range(len(data))] prompts = json.load(open(os.path.join(args.data_dir, "dense_captions", "dense_captions_rmthan300.json"), 'r'))['Car'] bboxs = np.load(os.path.join(args.data_dir, "all_coordinates_test.npy"), allow_pickle=True) self.data, self.prompts, self.bboxs = self.process_data(data, prompts, bboxs) # latent = np.load(os.path.join(args.data_dir, "latents_test.npy"), allow_pickle=True) # self.latent = self.padding_latent(latent, self.max_num_tokens).astype(np.int64) #import ipdb; ipdb.set_trace() # self.data = [self.data[1]] # self.prompts = [self.prompts[0]] # self.bboxs = [self.bboxs[1]] print(f"{split_set} dataset total data samples: {len(self.data)}") def padding_latent(self, data, max_len=300): # if data.shape[0] > max_len: # print(data.shape[0]) pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=16386) # pad_data[data.shape[0]-max_len:,-1] = 1 #flag label # pad_data[data.shape[0]-max_len:,-2] = 0 return pad_data def padding(self, data, max_len=300): # if data.shape[0] > max_len: # print(data.shape[0]) pad_data = np.pad(data, ((0, max_len - data.shape[0]), (0, 0)), 'constant', constant_values=-1) pad_data[data.shape[0]-max_len:,-1] = 1 #flag label pad_data[data.shape[0]-max_len:,-2] = 0 return pad_data def permute(self, data, n_permutations=3): return [data] + [data[np.random.permutation(len(data))] for _ in range(n_permutations-1)] def process_data(self, data, prompts, bboxs): processed_data, processed_prompts, processed_bboxs = [], [], [] for i in range(len(data)): if self.perm_num > 0: permuted_samples = self.permute(data[i], self.perm_num) processed_data.extend([self.padding(p, self.max_num_tokens) for p in permuted_samples]) processed_prompts.extend([prompts[i]] * self.perm_num) processed_bboxs.extend([bboxs[i]] * self.perm_num) else: processed_data.append(self.padding(data[i], self.max_num_tokens)) processed_prompts.append(prompts[i]) processed_bboxs.append(bboxs[i]) return processed_data, processed_prompts, np.array(processed_bboxs) def __len__(self): return len(self.data) def __getitem__(self, idx): data = self.data[idx] prompt = self.prompts[idx] bbox = self.bboxs[idx] #latent = self.latent[idx] #import ipdb; ipdb.set_trace() data_dict = {} data_dict['prompt'] = prompt data_dict['target'] = data #data_dict['latent'] = latent data_dict['bbox'] = bbox return data_dict class CubeDataset: def __init__(self, args, split_set="train"): super().__init__() # self.num_tokens = args.n_discrete_size # self.no_aug = args.no_aug self.split_set = split_set # if split_set == "test": # self.no_aug = True data = np.load(os.path.join(args.data_dir, split_set + ".npz"), allow_pickle=True) self.data = [data[name] for name in data.files] # if cur_data['faces_num'] <= self.max_triangles # and cur_data['faces_num'] >= self.min_triangles] self.prompts = json.load(open(os.path.join(args.data_dir, "text.json"), 'r'))['minecraft'] print(f"{split_set} dataset total data samples: {len(self.data)}") def __len__(self): return len(self.data) def __getitem__(self, idx): data = self.data[idx] #prompt = self.prompts[idx] #import ipdb; ipdb.set_trace() data_dict = {} #data_dict['prompt'] = prompt data_dict['latent'] = data return data_dict if __name__ == "__main__": file_path = '/public/home/wangshuo/gap/assembly/data/blue classic car/blue classic car.ldr' ldr_lines = read_ldr_file(file_path) parsed_parts = parse_ldr_lines(ldr_lines) # import ipdb; ipdb.set_trace() for part in parsed_parts: print(part)