cxr-vlm-code / count_img.py
convitom
f
b961b41
import os
import json
def get_local_images(root_dir):
"""
Lấy toàn bộ đường dẫn ảnh local (dạng pxx/...)
"""
local_images = set()
for p_folder in os.listdir(root_dir):
if not p_folder.startswith("p1"): # chỉ p10 -> p19
continue
p_path = os.path.join(root_dir, p_folder)
for root, _, files in os.walk(p_path):
for file in files:
if file.endswith(".jpg"):
full_path = os.path.join(root, file)
# convert về dạng giống VQA: p10/.../xxx.jpg
rel_path = os.path.relpath(full_path, root_dir)
rel_path = rel_path.replace("\\", "/")
local_images.add(rel_path)
return local_images
def get_vqa_images(vqa_json_path):
"""
Lấy toàn bộ image_path từ file VQA json
"""
with open(vqa_json_path, "r", encoding="utf-8") as f:
data = json.load(f)
vqa_images = set()
for item in data:
if "image_path" in item:
vqa_images.add(item["image_path"])
return vqa_images
def main(root_dir, vqa_json_path):
print("Đang quét ảnh local...")
local_images = get_local_images(root_dir)
print(f"Số ảnh local: {len(local_images)}")
print("Đang đọc VQA json...")
vqa_images = get_vqa_images(vqa_json_path)
print(f"Số ảnh trong VQA: {len(vqa_images)}")
# intersection
matched = local_images & vqa_images
print("\n===== KẾT QUẢ =====")
print(f"Số ảnh trùng: {len(matched)}")
print(f"Tỷ lệ cover VQA: {len(matched) / len(vqa_images):.4f}")
# nếu muốn lưu danh sách
with open("matched_images.txt", "w") as f:
for path in matched:
f.write(path + "\n")
print("Đã lưu danh sách vào matched_images.txt")
if __name__ == "__main__":
x = "train"
y = "valid"
root_dir = r"D:\USTH\KLTN\data\{x}".format(x=x) # ví dụ: D:/mimic-cxr
vqa_json = r"D:\USTH\KLTN\data\mimic-ext-mimic-cxr-vqa-a-complex-diverse-and-large-scale-visual-question-answering-dataset-for-chest-x-ray-images-1.0.0\MIMIC-Ext-MIMIC-CXR-VQA\dataset\{y}.json".format(y=y) # ví dụ: D:/vqa/train.json
main(root_dir, vqa_json)