|
|
import numpy as np |
|
|
import cv2 |
|
|
from PIL import Image |
|
|
import os |
|
|
import glob |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
from collections import Counter |
|
|
|
|
|
def analyze_face_dataset(faces_directory="Real_Images/", sample_size=None): |
|
|
"""Comprehensive analysis of the face dataset""" |
|
|
|
|
|
print(f"π ANALYZING FACE DATASET: {faces_directory}") |
|
|
print("="*50) |
|
|
|
|
|
if not os.path.exists(faces_directory): |
|
|
print(f"β Directory '{faces_directory}' not found!") |
|
|
return |
|
|
|
|
|
|
|
|
face_extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tiff'] |
|
|
face_files = [] |
|
|
|
|
|
for extension in face_extensions: |
|
|
face_files.extend(glob.glob(os.path.join(faces_directory, extension))) |
|
|
|
|
|
if len(face_files) == 0: |
|
|
print(f"β No face images found in {faces_directory}") |
|
|
return |
|
|
|
|
|
print(f"π Found {len(face_files)} face images") |
|
|
|
|
|
|
|
|
if sample_size and len(face_files) > sample_size: |
|
|
face_files = face_files[:sample_size] |
|
|
print(f"π Analyzing sample of {sample_size} images for performance") |
|
|
|
|
|
|
|
|
face_data = [] |
|
|
image_sizes = [] |
|
|
file_sizes = [] |
|
|
color_stats = [] |
|
|
brightness_values = [] |
|
|
dominant_colors = [] |
|
|
|
|
|
print("\nπ Processing images...") |
|
|
|
|
|
for i, face_path in enumerate(face_files): |
|
|
try: |
|
|
|
|
|
image = Image.open(face_path) |
|
|
|
|
|
|
|
|
file_size = os.path.getsize(face_path) / 1024 |
|
|
file_sizes.append(file_size) |
|
|
|
|
|
|
|
|
width, height = image.size |
|
|
image_sizes.append((width, height)) |
|
|
|
|
|
|
|
|
if image.mode != 'RGB': |
|
|
image = image.convert('RGB') |
|
|
|
|
|
img_array = np.array(image) |
|
|
|
|
|
|
|
|
avg_color = np.mean(img_array, axis=(0, 1)) |
|
|
brightness = np.mean(avg_color) |
|
|
|
|
|
color_stats.append(avg_color) |
|
|
brightness_values.append(brightness) |
|
|
|
|
|
|
|
|
dominant_color = get_dominant_color(img_array) |
|
|
dominant_colors.append(dominant_color) |
|
|
|
|
|
|
|
|
face_data.append({ |
|
|
'filename': os.path.basename(face_path), |
|
|
'size': (width, height), |
|
|
'file_size_kb': file_size, |
|
|
'avg_color': avg_color, |
|
|
'brightness': brightness, |
|
|
'dominant_color': dominant_color |
|
|
}) |
|
|
|
|
|
if (i + 1) % 50 == 0: |
|
|
print(f" Processed {i + 1}/{len(face_files)} images...") |
|
|
|
|
|
except Exception as e: |
|
|
print(f" β οΈ Error processing {face_path}: {e}") |
|
|
|
|
|
print(f"\nβ
Analysis complete! Processed {len(face_data)} valid images") |
|
|
|
|
|
|
|
|
generate_analysis_report(face_data, face_files, faces_directory) |
|
|
|
|
|
|
|
|
create_analysis_visualizations(face_data) |
|
|
|
|
|
return face_data |
|
|
|
|
|
def get_dominant_color(image_array): |
|
|
"""Extract dominant color using uniform quantization""" |
|
|
|
|
|
step_size = 256 // 8 |
|
|
quantized = (image_array // step_size) * step_size + step_size // 2 |
|
|
quantized = np.clip(quantized, 0, 255) |
|
|
|
|
|
|
|
|
colors = quantized.reshape(-1, 3) |
|
|
|
|
|
|
|
|
unique_colors, counts = np.unique(colors, axis=0, return_counts=True) |
|
|
most_frequent_idx = np.argmax(counts) |
|
|
|
|
|
return unique_colors[most_frequent_idx].astype(float) |
|
|
|
|
|
def generate_analysis_report(face_data, all_files, directory): |
|
|
"""Generate detailed analysis report""" |
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("π COMPREHENSIVE FACE DATASET ANALYSIS") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
print(f"\nπ DATASET OVERVIEW:") |
|
|
print(f"Directory: {directory}") |
|
|
print(f"Total Images Found: {len(all_files)}") |
|
|
print(f"Successfully Processed: {len(face_data)}") |
|
|
print(f"Success Rate: {100 * len(face_data) / len(all_files):.1f}%") |
|
|
|
|
|
|
|
|
file_sizes = [face['file_size_kb'] for face in face_data] |
|
|
print(f"\nπΎ FILE SIZE ANALYSIS:") |
|
|
print(f"Average File Size: {np.mean(file_sizes):.1f} KB") |
|
|
print(f"File Size Range: {np.min(file_sizes):.1f} - {np.max(file_sizes):.1f} KB") |
|
|
print(f"Total Dataset Size: {np.sum(file_sizes)/1024:.1f} MB") |
|
|
|
|
|
|
|
|
widths = [face['size'][0] for face in face_data] |
|
|
heights = [face['size'][1] for face in face_data] |
|
|
|
|
|
print(f"\nπΌοΈ IMAGE DIMENSION ANALYSIS:") |
|
|
print(f"Width Range: {np.min(widths)} - {np.max(widths)} pixels") |
|
|
print(f"Height Range: {np.min(heights)} - {np.max(heights)} pixels") |
|
|
print(f"Average Dimensions: {np.mean(widths):.0f} Γ {np.mean(heights):.0f}") |
|
|
|
|
|
|
|
|
aspect_ratios = [w/h for w, h in zip(widths, heights)] |
|
|
aspect_ratio_rounded = [round(ar, 2) for ar in aspect_ratios] |
|
|
common_ratios = Counter(aspect_ratio_rounded).most_common(5) |
|
|
|
|
|
print(f"\nπ COMMON ASPECT RATIOS:") |
|
|
for ratio, count in common_ratios: |
|
|
percentage = 100 * count / len(face_data) |
|
|
print(f" {ratio:.2f} : 1 β {count} images ({percentage:.1f}%)") |
|
|
|
|
|
|
|
|
brightnesses = [face['brightness'] for face in face_data] |
|
|
avg_colors = np.array([face['avg_color'] for face in face_data]) |
|
|
|
|
|
print(f"\nπ¨ COLOR ANALYSIS:") |
|
|
print(f"Brightness Range: {np.min(brightnesses):.1f} - {np.max(brightnesses):.1f}") |
|
|
print(f"Average Brightness: {np.mean(brightnesses):.1f} Β± {np.std(brightnesses):.1f}") |
|
|
|
|
|
|
|
|
print(f"\nRGB Channel Averages:") |
|
|
print(f" Red Channel: {np.mean(avg_colors[:, 0]):.1f} Β± {np.std(avg_colors[:, 0]):.1f}") |
|
|
print(f" Green Channel: {np.mean(avg_colors[:, 1]):.1f} Β± {np.std(avg_colors[:, 1]):.1f}") |
|
|
print(f" Blue Channel: {np.mean(avg_colors[:, 2]):.1f} Β± {np.std(avg_colors[:, 2]):.1f}") |
|
|
|
|
|
|
|
|
color_diversity = calculate_color_diversity(avg_colors) |
|
|
print(f"\nπ DIVERSITY METRICS:") |
|
|
print(f"Color Diversity Score: {color_diversity:.2f} (0=uniform, 1=maximum diversity)") |
|
|
|
|
|
|
|
|
assess_mosaic_suitability(face_data) |
|
|
|
|
|
def calculate_color_diversity(colors): |
|
|
"""Calculate color diversity score""" |
|
|
|
|
|
n = len(colors) |
|
|
total_distance = 0 |
|
|
max_possible_distance = np.sqrt(3 * 255**2) |
|
|
|
|
|
for i in range(n): |
|
|
for j in range(i+1, n): |
|
|
distance = np.sqrt(np.sum((colors[i] - colors[j])**2)) |
|
|
total_distance += distance |
|
|
|
|
|
|
|
|
if n > 1: |
|
|
avg_distance = total_distance / (n * (n-1) / 2) |
|
|
diversity_score = avg_distance / max_possible_distance |
|
|
else: |
|
|
diversity_score = 0 |
|
|
|
|
|
return diversity_score |
|
|
|
|
|
def assess_mosaic_suitability(face_data): |
|
|
"""Assess how suitable the dataset is for mosaic generation""" |
|
|
print(f"\nπ― MOSAIC SUITABILITY ASSESSMENT:") |
|
|
|
|
|
num_faces = len(face_data) |
|
|
brightnesses = [face['brightness'] for face in face_data] |
|
|
brightness_std = np.std(brightnesses) |
|
|
|
|
|
|
|
|
if num_faces < 50: |
|
|
quantity_rating = "Poor" |
|
|
quantity_advice = "Add more faces for better variety" |
|
|
elif num_faces < 200: |
|
|
quantity_rating = "Fair" |
|
|
quantity_advice = "Good start, more faces will improve results" |
|
|
elif num_faces < 500: |
|
|
quantity_rating = "Good" |
|
|
quantity_advice = "Excellent quantity for most mosaics" |
|
|
else: |
|
|
quantity_rating = "Excellent" |
|
|
quantity_advice = "Outstanding variety available" |
|
|
|
|
|
|
|
|
if brightness_std < 20: |
|
|
diversity_rating = "Poor" |
|
|
diversity_advice = "Add faces with more varied lighting" |
|
|
elif brightness_std < 40: |
|
|
diversity_rating = "Fair" |
|
|
diversity_advice = "Decent range, could use more variety" |
|
|
elif brightness_std < 60: |
|
|
diversity_rating = "Good" |
|
|
diversity_advice = "Good brightness diversity" |
|
|
else: |
|
|
diversity_rating = "Excellent" |
|
|
diversity_advice = "Excellent brightness range" |
|
|
|
|
|
print(f" Quantity Rating: {quantity_rating} ({num_faces} faces)") |
|
|
print(f" Advice: {quantity_advice}") |
|
|
print(f" Diversity Rating: {diversity_rating} (Ο={brightness_std:.1f})") |
|
|
print(f" Advice: {diversity_advice}") |
|
|
|
|
|
|
|
|
recommended_grids = [] |
|
|
if num_faces >= 64: |
|
|
recommended_grids.append("8Γ8 (64 tiles)") |
|
|
if num_faces >= 256: |
|
|
recommended_grids.append("16Γ16 (256 tiles)") |
|
|
if num_faces >= 1024: |
|
|
recommended_grids.append("32Γ32 (1,024 tiles)") |
|
|
if num_faces >= 2500: |
|
|
recommended_grids.append("50Γ50+ (2,500+ tiles)") |
|
|
|
|
|
if recommended_grids: |
|
|
print(f"\nπ RECOMMENDED GRID SIZES:") |
|
|
for grid in recommended_grids: |
|
|
print(f" β
{grid}") |
|
|
else: |
|
|
print(f"\nβ οΈ Dataset too small for most grid sizes. Add more faces!") |
|
|
|
|
|
def create_analysis_visualizations(face_data): |
|
|
"""Create comprehensive visualizations""" |
|
|
|
|
|
fig, axes = plt.subplots(2, 3, figsize=(18, 12)) |
|
|
fig.suptitle('Face Dataset Analysis', fontsize=16, fontweight='bold') |
|
|
|
|
|
|
|
|
brightnesses = [face['brightness'] for face in face_data] |
|
|
axes[0, 0].hist(brightnesses, bins=30, color='skyblue', alpha=0.7, edgecolor='black') |
|
|
axes[0, 0].set_title('Brightness Distribution') |
|
|
axes[0, 0].set_xlabel('Average Brightness') |
|
|
axes[0, 0].set_ylabel('Number of Faces') |
|
|
axes[0, 0].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
file_sizes = [face['file_size_kb'] for face in face_data] |
|
|
axes[0, 1].hist(file_sizes, bins=25, color='lightcoral', alpha=0.7, edgecolor='black') |
|
|
axes[0, 1].set_title('File Size Distribution') |
|
|
axes[0, 1].set_xlabel('File Size (KB)') |
|
|
axes[0, 1].set_ylabel('Number of Faces') |
|
|
axes[0, 1].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
widths = [face['size'][0] for face in face_data] |
|
|
heights = [face['size'][1] for face in face_data] |
|
|
axes[0, 2].scatter(widths, heights, alpha=0.6, color='green') |
|
|
axes[0, 2].set_title('Image Dimensions') |
|
|
axes[0, 2].set_xlabel('Width (pixels)') |
|
|
axes[0, 2].set_ylabel('Height (pixels)') |
|
|
axes[0, 2].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
avg_colors = np.array([face['avg_color'] for face in face_data]) |
|
|
axes[1, 0].hist(avg_colors[:, 0], bins=20, alpha=0.7, color='red', label='Red') |
|
|
axes[1, 0].hist(avg_colors[:, 1], bins=20, alpha=0.7, color='green', label='Green') |
|
|
axes[1, 0].hist(avg_colors[:, 2], bins=20, alpha=0.7, color='blue', label='Blue') |
|
|
axes[1, 0].set_title('RGB Channel Distribution') |
|
|
axes[1, 0].set_xlabel('Color Value') |
|
|
axes[1, 0].set_ylabel('Frequency') |
|
|
axes[1, 0].legend() |
|
|
axes[1, 0].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
dominant_colors = np.array([face['dominant_color'] for face in face_data]) |
|
|
sample_indices = np.random.choice(len(dominant_colors), min(100, len(dominant_colors)), replace=False) |
|
|
sample_colors = dominant_colors[sample_indices] / 255.0 |
|
|
|
|
|
|
|
|
palette_size = int(np.sqrt(len(sample_colors))) |
|
|
palette = np.zeros((palette_size, palette_size, 3)) |
|
|
for i in range(palette_size): |
|
|
for j in range(palette_size): |
|
|
idx = i * palette_size + j |
|
|
if idx < len(sample_colors): |
|
|
palette[i, j] = sample_colors[idx] |
|
|
|
|
|
axes[1, 1].imshow(palette) |
|
|
axes[1, 1].set_title('Sample Dominant Colors') |
|
|
axes[1, 1].set_xticks([]) |
|
|
axes[1, 1].set_yticks([]) |
|
|
|
|
|
|
|
|
grid_sizes = [8, 16, 24, 32, 48, 64] |
|
|
suitability_scores = [] |
|
|
|
|
|
for grid_size in grid_sizes: |
|
|
total_tiles = grid_size * grid_size |
|
|
if len(face_data) >= total_tiles: |
|
|
|
|
|
score = min(1.0, len(face_data) / total_tiles) |
|
|
else: |
|
|
|
|
|
score = len(face_data) / total_tiles |
|
|
suitability_scores.append(score) |
|
|
|
|
|
bars = axes[1, 2].bar(grid_sizes, suitability_scores, color='gold', alpha=0.7, edgecolor='black') |
|
|
axes[1, 2].set_title('Grid Size Suitability') |
|
|
axes[1, 2].set_xlabel('Grid Size (NΓN)') |
|
|
axes[1, 2].set_ylabel('Coverage Ratio') |
|
|
axes[1, 2].set_ylim(0, 1.1) |
|
|
axes[1, 2].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
for bar, score in zip(bars, suitability_scores): |
|
|
if score >= 1.0: |
|
|
label = "Full" |
|
|
color = 'darkgreen' |
|
|
elif score >= 0.5: |
|
|
label = f"{score:.1f}" |
|
|
color = 'darkorange' |
|
|
else: |
|
|
label = f"{score:.2f}" |
|
|
color = 'darkred' |
|
|
|
|
|
axes[1, 2].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, |
|
|
label, ha='center', va='bottom', fontweight='bold', color=color) |
|
|
|
|
|
plt.tight_layout() |
|
|
plt.savefig('face_dataset_analysis.png', dpi=300, bbox_inches='tight') |
|
|
plt.show() |
|
|
|
|
|
print(f"\nπ Visualizations saved as 'face_dataset_analysis.png'") |
|
|
|
|
|
def recommend_optimizations(face_data): |
|
|
"""Provide specific recommendations for improving the dataset""" |
|
|
|
|
|
print(f"\nπ‘ OPTIMIZATION RECOMMENDATIONS:") |
|
|
print("-" * 40) |
|
|
|
|
|
num_faces = len(face_data) |
|
|
brightnesses = [face['brightness'] for face in face_data] |
|
|
avg_colors = np.array([face['avg_color'] for face in face_data]) |
|
|
|
|
|
|
|
|
brightness_bins = np.histogram(brightnesses, bins=10)[0] |
|
|
empty_bins = np.sum(brightness_bins == 0) |
|
|
|
|
|
if empty_bins > 2: |
|
|
print(f"π Add faces with different lighting conditions") |
|
|
print(f" Found {empty_bins} gaps in brightness spectrum") |
|
|
|
|
|
|
|
|
color_std = np.std(avg_colors, axis=0) |
|
|
if np.mean(color_std) < 30: |
|
|
print(f"π Add more diverse face colors/ethnicities") |
|
|
print(f" Current color variation is limited") |
|
|
|
|
|
|
|
|
target_grids = [(32, 1024), (48, 2304), (64, 4096)] |
|
|
for grid_size, needed_tiles in target_grids: |
|
|
if num_faces < needed_tiles: |
|
|
additional_needed = needed_tiles - num_faces |
|
|
print(f"π For {grid_size}Γ{grid_size} mosaics: Add {additional_needed} more faces") |
|
|
|
|
|
print(f"\nπ― Priority Actions:") |
|
|
print(f" 1. Collect faces with varied lighting (bright, normal, dark)") |
|
|
print(f" 2. Include diverse ethnicities and skin tones") |
|
|
print(f" 3. Add faces with different backgrounds") |
|
|
print(f" 4. Maintain consistent image quality") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print("Starting face dataset analysis...") |
|
|
|
|
|
|
|
|
face_data = analyze_face_dataset("Real_Images/", sample_size=1000) |
|
|
|
|
|
if face_data: |
|
|
|
|
|
recommend_optimizations(face_data) |
|
|
|
|
|
print(f"\nπ Analysis Complete!") |
|
|
print(f"Check 'face_dataset_analysis.png' for visual analysis") |
|
|
print(f"Your dataset is ready for mosaic generation!") |
|
|
else: |
|
|
print(f"β Analysis failed. Please check your Real_Images/ directory.") |