hieu3636
/

cxr-vlm-code

Model card Files Files and versions

cxr-vlm-code / count_img.py

convitom

f

b961b41 13 days ago

history blame contribute delete

2.29 kB

	import os
	import json

	def get_local_images(root_dir):
	"""
	Lấy toàn bộ đường dẫn ảnh local (dạng pxx/...)
	"""
	local_images = set()

	for p_folder in os.listdir(root_dir):
	if not p_folder.startswith("p1"): # chỉ p10 -> p19
	continue

	p_path = os.path.join(root_dir, p_folder)

	for root, _, files in os.walk(p_path):
	for file in files:
	if file.endswith(".jpg"):
	full_path = os.path.join(root, file)

	# convert về dạng giống VQA: p10/.../xxx.jpg
	rel_path = os.path.relpath(full_path, root_dir)
	rel_path = rel_path.replace("\\", "/")

	local_images.add(rel_path)

	return local_images


	def get_vqa_images(vqa_json_path):
	"""
	Lấy toàn bộ image_path từ file VQA json
	"""
	with open(vqa_json_path, "r", encoding="utf-8") as f:
	data = json.load(f)

	vqa_images = set()

	for item in data:
	if "image_path" in item:
	vqa_images.add(item["image_path"])

	return vqa_images


	def main(root_dir, vqa_json_path):
	print("Đang quét ảnh local...")
	local_images = get_local_images(root_dir)
	print(f"Số ảnh local: {len(local_images)}")

	print("Đang đọc VQA json...")
	vqa_images = get_vqa_images(vqa_json_path)
	print(f"Số ảnh trong VQA: {len(vqa_images)}")

	# intersection
	matched = local_images & vqa_images

	print("\n===== KẾT QUẢ =====")
	print(f"Số ảnh trùng: {len(matched)}")
	print(f"Tỷ lệ cover VQA: {len(matched) / len(vqa_images):.4f}")

	# nếu muốn lưu danh sách
	with open("matched_images.txt", "w") as f:
	for path in matched:
	f.write(path + "\n")

	print("Đã lưu danh sách vào matched_images.txt")


	if __name__ == "__main__":
	x = "train"
	y = "valid"
	root_dir = r"D:\USTH\KLTN\data\{x}".format(x=x) # ví dụ: D:/mimic-cxr
	vqa_json = r"D:\USTH\KLTN\data\mimic-ext-mimic-cxr-vqa-a-complex-diverse-and-large-scale-visual-question-answering-dataset-for-chest-x-ray-images-1.0.0\MIMIC-Ext-MIMIC-CXR-VQA\dataset\{y}.json".format(y=y) # ví dụ: D:/vqa/train.json

	main(root_dir, vqa_json)