| import os |
| import requests |
| from PIL import Image |
| import io |
| import json |
|
|
| def download_coco_subset(num_images=100, output_dir="/home/ubuntu/data/coco/train2017"): |
| """Download COCO subset using direct URLs""" |
| |
| print("๐ฅ Downloading COCO subset via direct URLs...") |
| |
| |
| os.makedirs(output_dir, exist_ok=True) |
| |
| |
| |
| sample_urls = [ |
| "http://images.cocodataset.org/train2017/000000000009.jpg", |
| "http://images.cocodataset.org/train2017/000000000025.jpg", |
| "http://images.cocodataset.org/train2017/000000000030.jpg", |
| "http://images.cocodataset.org/train2017/000000000034.jpg", |
| "http://images.cocodataset.org/train2017/000000000036.jpg", |
| "http://images.cocodataset.org/train2017/000000000042.jpg", |
| "http://images.cocodataset.org/train2017/000000000051.jpg", |
| "http://images.cocodataset.org/train2017/000000000052.jpg", |
| "http://images.cocodataset.org/train2017/000000000061.jpg", |
| "http://images.cocodataset.org/train2017/000000000064.jpg", |
| "http://images.cocodataset.org/train2017/000000000072.jpg", |
| "http://images.cocodataset.org/train2017/000000000074.jpg", |
| "http://images.cocodataset.org/train2017/000000000085.jpg", |
| "http://images.cocodataset.org/train2017/000000000094.jpg", |
| "http://images.cocodataset.org/train2017/000000000097.jpg", |
| "http://images.cocodataset.org/train2017/000000000104.jpg", |
| "http://images.cocodataset.org/train2017/000000000106.jpg", |
| "http://images.cocodataset.org/train2017/000000000110.jpg", |
| "http://images.cocodataset.org/train2017/000000000113.jpg", |
| "http://images.cocodataset.org/train2017/000000000119.jpg" |
| ] |
| |
| saved_count = 0 |
| failed_count = 0 |
| |
| for i, url in enumerate(sample_urls[:num_images]): |
| try: |
| print(f"๐ธ Downloading {i+1}/{min(num_images, len(sample_urls))}: {url}") |
| |
| |
| response = requests.get(url, timeout=30) |
| response.raise_for_status() |
| |
| |
| image_path = f"{output_dir}/coco_downloaded_{i:08d}.jpg" |
| with open(image_path, 'wb') as f: |
| f.write(response.content) |
| |
| |
| img = Image.open(image_path) |
| img.verify() |
| |
| saved_count += 1 |
| print(f" โ
Saved: {image_path}") |
| |
| except Exception as e: |
| failed_count += 1 |
| print(f" โ Failed: {e}") |
| continue |
| |
| print(f"โ
Download complete! {saved_count} images saved, {failed_count} failed") |
| return saved_count |
|
|
| if __name__ == "__main__": |
| download_coco_subset(20) |
|
|