CBIR-System / prepare_images_for_deployment.py
IT4CHI2311's picture
Deploy CBIR system
b332550
"""
Script to prepare images for HuggingFace deployment
This will copy/organize images and update paths in image_paths.pkl
"""
import pickle
import shutil
from pathlib import Path
import os
from tqdm import tqdm
def prepare_images_for_deployment(
image_paths_file='image_paths.pkl',
output_dir='images',
max_images_per_category=None,
create_examples=True
):
"""
Prepare images for deployment by organizing them in a clean structure
Args:
image_paths_file: Path to your image_paths.pkl file
output_dir: Directory where images will be copied
max_images_per_category: Limit images per category (None = all)
create_examples: Whether to create example images folder
"""
print("Loading image paths...")
with open(image_paths_file, 'rb') as f:
image_paths = pickle.load(f)
print(f"Total images in index: {len(image_paths)}")
# Create output directory
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
# Track statistics
copied_count = 0
missing_count = 0
new_paths = []
categories = {}
print("\nCopying images...")
for idx, img_path in enumerate(tqdm(image_paths)):
img_path = Path(img_path)
# Check if source image exists
if not img_path.exists():
print(f"Warning: Image not found: {img_path}")
missing_count += 1
new_paths.append(str(img_path)) # Keep original path
continue
# Get category and filename
category = img_path.parent.name
filename = img_path.name
# Track categories
if category not in categories:
categories[category] = 0
# Check if we should skip due to max_images_per_category
if max_images_per_category and categories[category] >= max_images_per_category:
continue
categories[category] += 1
# Create category folder
category_path = output_path / category
category_path.mkdir(exist_ok=True)
# Destination path
dest_path = category_path / filename
# Copy image
if not dest_path.exists():
shutil.copy2(img_path, dest_path)
# Store relative path
relative_path = f"./{output_dir}/{category}/{filename}"
new_paths.append(relative_path)
copied_count += 1
print(f"\nβœ“ Copied {copied_count} images")
print(f"βœ— Missing {missing_count} images")
print(f"πŸ“ Categories: {len(categories)}")
# Save updated paths
new_pkl_file = 'image_paths_deployment.pkl'
with open(new_pkl_file, 'wb') as f:
pickle.dump(new_paths, f)
print(f"\nβœ“ Saved updated paths to: {new_pkl_file}")
# Create examples folder
if create_examples and copied_count > 0:
examples_path = output_path / 'examples'
examples_path.mkdir(exist_ok=True)
# Copy one image from each of first 5 categories as examples
example_count = 0
for category in list(categories.keys())[:5]:
category_path = output_path / category
images = list(category_path.glob('*.jpg'))
if images:
example_dest = examples_path / f"{category}_{images[0].name}"
shutil.copy2(images[0], example_dest)
example_count += 1
print(f"βœ“ Created {example_count} example images in {examples_path}")
# Print deployment instructions
print("\n" + "="*60)
print("DEPLOYMENT INSTRUCTIONS")
print("="*60)
print("\n1. Replace 'image_paths.pkl' with 'image_paths_deployment.pkl':")
print(" > mv image_paths_deployment.pkl image_paths.pkl")
print("\n2. Your directory structure should look like:")
print(" β”œβ”€β”€ app.py")
print(" β”œβ”€β”€ requirements.txt")
print(" β”œβ”€β”€ README.md")
print(" β”œβ”€β”€ faiss_index.bin")
print(" β”œβ”€β”€ image_paths.pkl")
print(" └── images/")
print(" β”œβ”€β”€ examples/")
print(" β”œβ”€β”€ category1/")
print(" β”œβ”€β”€ category2/")
print(" └── ...")
print("\n3. Commit and push to HuggingFace:")
print(" > git lfs track '*.bin' '*.pkl'")
print(" > git add .")
print(" > git commit -m 'Add images and deployment files'")
print(" > git push")
print("\n4. Total size estimate:")
# Estimate size
total_size = 0
for root, dirs, files in os.walk(output_dir):
for file in files:
filepath = os.path.join(root, file)
total_size += os.path.getsize(filepath)
size_mb = total_size / (1024 * 1024)
size_gb = size_mb / 1024
if size_gb > 1:
print(f" ~{size_gb:.2f} GB")
else:
print(f" ~{size_mb:.2f} MB")
if size_gb > 10:
print("\n⚠️ WARNING: Your dataset is larger than 10GB!")
print(" Consider using HuggingFace Datasets or reducing images.")
return new_paths, categories
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Prepare images for HuggingFace deployment')
parser.add_argument('--pkl-file', default='image_paths.pkl',
help='Path to image_paths.pkl file')
parser.add_argument('--output-dir', default='images',
help='Output directory for images')
parser.add_argument('--max-per-category', type=int, default=None,
help='Maximum images per category (for size reduction)')
parser.add_argument('--no-examples', action='store_true',
help='Skip creating example images')
args = parser.parse_args()
prepare_images_for_deployment(
image_paths_file=args.pkl_file,
output_dir=args.output_dir,
max_images_per_category=args.max_per_category,
create_examples=not args.no_examples
)