Spaces:
Paused
Paused
File size: 7,140 Bytes
fc3484d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import os
import shutil
import json
import re
from collections import defaultdict
def extract_leading_digits(prefix):
"""提取前缀中最前面的连续数字,无数字则返回None"""
match = re.match(r'^(\d+)', prefix)
return match.group(1) if match else None
def compare_json_and_prefixes(json_file_path):
"""对比JSON中的.dat键名和提取的前缀,找出差异"""
try:
print("\n" + "=" * 80)
print("第一步:JSON键名与提取前缀对比分析")
print("=" * 80)
# 1. 读取JSON所有.dat键名
json_keys = []
with open(json_file_path, 'r', encoding='utf-8') as f:
dat_mapping = json.load(f)
for key in dat_mapping.keys():
key_stripped = key.strip()
if key_stripped.endswith('.dat'):
json_keys.append(key_stripped)
total_json_keys = len(json_keys)
print(f"1. 从JSON中读取到 {total_json_keys} 个.dat键名")
# 2. 提取前缀并分析
extracted_prefixes = {}
prefix_to_keys = defaultdict(list) # 记录前缀对应的所有键名
for key in json_keys:
original_prefix = os.path.splitext(key)[0].strip()
lower_prefix = original_prefix.lower()
extracted_prefixes[lower_prefix] = original_prefix
prefix_to_keys[lower_prefix].append(key)
total_extracted = len(extracted_prefixes)
print(f"2. 提取到 {total_extracted} 个唯一前缀")
# 3. 分析差异原因
if total_extracted < total_json_keys:
diff_count = total_json_keys - total_extracted
print(f"\n⚠️ 发现差异:前缀数比JSON键名少 {diff_count} 个")
print(" 原因:存在不同键名对应相同前缀的情况(如下所示)")
# 显示前5个存在重复前缀的例子
duplicate_prefixes = {p: keys for p, keys in prefix_to_keys.items() if len(keys) > 1}
print(f"\n 存在重复前缀的键名数量:{len(duplicate_prefixes)} 组")
for i, (prefix, keys) in enumerate(list(duplicate_prefixes.items())[:5], 1):
print(f" 示例 {i}: 前缀 '{prefix}' 对应 {len(keys)} 个键名 → {keys[:2]}...")
else:
print("\n✅ 前缀数与JSON键名数一致,无差异")
return json_keys, prefix_to_keys
except Exception as e:
print(f"❌ 对比分析失败: {str(e)}")
return [], {}
def copy_obj_files(json_file_path, obj_source_dir, dest_dir, json_keys):
"""复制OBJ文件,基于JSON中的.dat键名"""
try:
print("\n" + "=" * 80)
print("第二步:OBJ文件复制操作")
print("=" * 80)
# 1. 初始化目录
os.makedirs(dest_dir, exist_ok=True)
print(f"📂 目标目录已准备:{dest_dir}")
# 2. 预处理OBJ文件
obj_lower_to_info = {}
for filename in os.listdir(obj_source_dir):
if filename.lower().endswith('.obj'):
obj_original_prefix = os.path.splitext(filename)[0].strip()
obj_lower_prefix = obj_original_prefix.lower()
obj_full_path = os.path.join(obj_source_dir, filename)
obj_lower_to_info[obj_lower_prefix] = (obj_original_prefix, obj_full_path)
print(f"🔍 已预处理 {len(obj_lower_to_info)} 个OBJ文件")
# 3. 提取所有dat前缀(保留原始顺序)
dat_prefixes = [os.path.splitext(key)[0].strip() for key in json_keys]
print(f"📋 待匹配的dat前缀总数:{len(dat_prefixes)} 个")
# 4. 第一次匹配:完整前缀匹配
first_copied = [] # [(dat前缀, OBJ文件名)]
for prefix in dat_prefixes:
lower_prefix = prefix.lower()
if lower_prefix in obj_lower_to_info:
obj_original, obj_path = obj_lower_to_info[lower_prefix]
obj_filename = os.path.basename(obj_path)
dest_path = os.path.join(dest_dir, obj_filename)
if not os.path.exists(dest_path):
shutil.copy2(obj_path, dest_path)
first_copied.append((prefix, obj_filename))
print(f"\n第一次匹配完成:{len(first_copied)} 个前缀匹配成功")
# 5. 第二次匹配:未匹配前缀提取前导数字
matched_prefixes = {p for p, _ in first_copied}
not_found_first = [p for p in dat_prefixes if p not in matched_prefixes]
print(f"初始未匹配的前缀:{len(not_found_first)} 个")
second_copied = [] # [(dat前缀, OBJ文件名, 提取的数字)]
if not_found_first:
for prefix in not_found_first:
leading_digits = extract_leading_digits(prefix)
if not leading_digits:
continue
target_lower = leading_digits.lower()
if target_lower in obj_lower_to_info:
obj_original, obj_path = obj_lower_to_info[target_lower]
obj_filename = os.path.basename(obj_path)
dest_path = os.path.join(dest_dir, obj_filename)
if not os.path.exists(dest_path):
shutil.copy2(obj_path, dest_path)
second_copied.append((prefix, obj_filename, leading_digits))
print(f"第二次匹配完成:{len(second_copied)} 个前缀匹配成功")
# 6. 最终结果汇总
print("\n" + "=" * 60)
print("最终结果汇总")
print("=" * 60)
total_matched = len(first_copied) + len(second_copied)
print(f"📊 统计信息:")
print(f" - JSON中.dat键名总数:{len(json_keys)}")
print(f" - 第一次匹配成功:{len(first_copied)}")
print(f" - 第二次匹配成功:{len(second_copied)}")
print(f" - 总匹配成功:{total_matched}")
print(f" - 最终未匹配:{len(dat_prefixes) - total_matched}")
print(f"\n🏁 所有操作完成!复制的文件保存至:{dest_dir}")
except Exception as e:
print(f"❌ 复制操作失败: {str(e)}")
def main():
# 配置文件路径
JSON_FILE_PATH = "/public/home/wangshuo/gap/assembly/data/car_1k/subset_self/label_mapping_freq.json"
OBJ_SOURCE_DIR = "/public/home/wangshuo/gap/assembly/data/part_obj"
DEST_DIR = "/public/home/wangshuo/gap/assembly/data/part_obj_300"
# 执行完整流程
print("=" * 80)
print("JSON键名分析与OBJ文件复制工具")
print("=" * 80)
# 第一步:分析JSON键名与前缀的关系
json_keys, _ = compare_json_and_prefixes(JSON_FILE_PATH)
if not json_keys:
print("❌ 未获取到有效的JSON键名,无法继续操作")
return
# 第二步:基于分析结果进行OBJ文件复制
copy_obj_files(JSON_FILE_PATH, OBJ_SOURCE_DIR, DEST_DIR, json_keys)
if __name__ == "__main__":
main()
|