| import pandas as pd |
| import matplotlib.pyplot as plt |
|
|
| |
| |
| df = pd.read_csv(r"D:\USTH\KLTN\cxr-vlm-data\mimic-cxr-2.0.0-metadata.csv") |
|
|
| |
| required_cols = ["Rows", "Columns"] |
| for col in required_cols: |
| if col not in df.columns: |
| raise ValueError(f"Thiếu cột: {col}") |
|
|
| |
| df["TotalPixels"] = df["Rows"] * df["Columns"] |
|
|
| |
| print("===== THỐNG KÊ KÍCH THƯỚC ẢNH =====") |
| print(df[["Rows", "Columns", "TotalPixels"]].describe()) |
|
|
| |
| df["AspectRatio"] = df["Columns"] / df["Rows"] |
|
|
| print("\n===== THỐNG KÊ TỈ LỆ KHUNG HÌNH =====") |
| print(df["AspectRatio"].describe()) |
|
|
| |
| |
| |
| plt.figure(figsize=(8, 5)) |
| plt.hist(df["Rows"], bins=30) |
| plt.xlabel("Rows (Height)") |
| plt.ylabel("Number of Images") |
| plt.title("Distribution of Image Heights") |
| plt.grid(True) |
| plt.show() |
|
|
| |
| |
| |
| plt.figure(figsize=(8, 5)) |
| plt.hist(df["Columns"], bins=30) |
| plt.xlabel("Columns (Width)") |
| plt.ylabel("Number of Images") |
| plt.title("Distribution of Image Widths") |
| plt.grid(True) |
| plt.show() |
|
|
| |
| |
| |
| plt.figure(figsize=(8, 5)) |
| plt.hist(df["TotalPixels"], bins=30) |
| plt.xlabel("Total Pixels") |
| plt.ylabel("Number of Images") |
| plt.title("Distribution of Image Sizes") |
| plt.grid(True) |
| plt.show() |
|
|
| |
| |
| |
| plt.figure(figsize=(7, 7)) |
| plt.scatter(df["Columns"], df["Rows"], alpha=0.5) |
| plt.xlabel("Width (Columns)") |
| plt.ylabel("Height (Rows)") |
| plt.title("Image Resolution Distribution") |
| plt.grid(True) |
| plt.show() |
|
|
| |
| |
| |
| resolution_counts = ( |
| df.groupby(["Rows", "Columns"]) |
| .size() |
| .reset_index(name="Count") |
| .sort_values("Count", ascending=False) |
| ) |
|
|
| print("\n===== TOP RESOLUTION PHỔ BIẾN =====") |
| print(resolution_counts.head(10)) |