Spaces:
Paused
Paused
File size: 4,555 Bytes
6011f4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
def checkEncoding(filepath):
with open(filepath, "rb") as encode_check:
encoding = encode_check.readline(3)
if encoding == b"\xfe\xff\x00":
return "utf_16_be"
elif encoding == b"\xff\xfe0":
return "utf_16_le"
else:
return "utf_8"
def readTextFile(filepath):
if os.path.exists(filepath):
file_encoding = checkEncoding(filepath)
try:
with open(filepath, "rt", encoding=file_encoding) as f_in:
return f_in.readlines()
except:
with open(filepath, "rt", encoding="latin_1") as f_in:
return f_in.readlines()
return None
def count_valid_lines(filepath):
"""
统计LDR文件main_section中以"1 "开头的有效行数
返回值: 有效行数,如果文件不存在或读取失败则返回-1
"""
if not os.path.isfile(filepath):
return -1
lines = readTextFile(filepath)
if lines is None:
return -1
# 统计main_section中以"1 "开头的有效行数
valid_line_count = 0
startLine = 0
endLine = 0
lineCount = 0
foundEnd = False
in_main_section = False
# 首先确定main_section的范围
for line in lines:
parameters = line.strip().split()
if len(parameters) > 2:
if parameters[0] == "0" and parameters[1] == "FILE":
if not foundEnd:
endLine = lineCount
if endLine > startLine:
# 标记main_section结束
foundEnd = True
break
# 标记main_section开始
startLine = lineCount
foundEnd = False
in_main_section = True
if parameters[0] == "0" and parameters[1] == "NOFILE":
endLine = lineCount
foundEnd = True
in_main_section = False
break
lineCount += 1
# 如果没找到结束标记,设置结束行为文件末尾
if not foundEnd:
endLine = len(lines)
in_main_section = True # 整个文件视为main_section
# 统计main_section范围内以"1 "开头的行
for i in range(startLine, endLine):
if lines[i].startswith("1 "):
valid_line_count += 1
return valid_line_count
def filter_ldr_by_valid_lines(input_dir):
"""
筛选目录下LDR文件,仅打印有效行数(以"1 "开头) >300或<10的文件名
"""
if not os.path.isdir(input_dir):
print(f"错误:目录不存在 → {input_dir}")
return
ldr_files = [f for f in os.listdir(input_dir) if f.lower().endswith('.ldr')]
if not ldr_files:
print(f"提示:在 {input_dir} 中未找到任何.ldr文件")
return
print(f"=== 开始筛选 {input_dir} 下的LDR文件 ===")
print(f"总计找到 {len(ldr_files)} 个LDR文件,仅显示符合条件的文件:\n")
# 分类存储符合条件的文件
less_than_10 = [] # 有效行数<10的文件
more_than_300 = [] # 有效行数>300的文件
failed_files = [] # 读取失败的文件
for ldr_file in ldr_files:
file_path = os.path.join(input_dir, ldr_file)
valid_count = count_valid_lines(file_path)
if valid_count == -1:
failed_files.append(ldr_file)
else:
if valid_count < 30:
less_than_10.append((ldr_file, valid_count))
elif valid_count > 300:
more_than_300.append((ldr_file, valid_count))
# 打印筛选结果
if less_than_10:
print("【1】有效行数(1开头)< 30 的文件:")
for file, count in less_than_10:
print(f" - {file} → 有效行数:{count}")
print()
if more_than_300:
print("【2】有效行数(1开头)> 300 的文件:")
for file, count in more_than_300:
print(f" - {file} → 有效行数:{count}")
print()
if failed_files:
print(f"【3】读取失败的文件(共{len(failed_files)}个):")
for file in failed_files:
print(f" - {file}")
print(f"\n=== 筛选完成 ===")
print(f"符合条件的文件总数:{len(less_than_10) + len(more_than_300)} 个")
if __name__ == "__main__":
# 请修改为你的LDR文件所在目录
LDR_DIRECTORY = "/public/home/wangshuo/gap/assembly/data/car_1k/subset/ldr/"
# 执行筛选
filter_ldr_by_valid_lines(LDR_DIRECTORY)
|