Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| import time | |
| from config import IMAGE_DIR, DATASET_SOURCES | |
| class DataCollector: | |
| def __init__(self): | |
| self.image_dir = IMAGE_DIR | |
| self.sources = DATASET_SOURCES | |
| def collect_sample_images(self): | |
| """Collect sample images from public sources""" | |
| # These are example URLs - in practice you'd scrape or use APIs | |
| sample_urls = [ | |
| "https://c7.alamy.com/comp/3AJ86J0/gold-on-quartz-bradshaw-mountains-arizona-gold-on-quartz-from-the-bradshaw-mountains-arizona-is-a-classic-and-highly-sought-after-mineral-associa-3AJ86J0.jpg", | |
| "https://www.nuggetsbygrant.com/cdn/shop/products/243A0948.jpg?v=1670014792&width=1080", | |
| "https://news.rice.edu/sites/g/files/bxs2656/files/inline-images/BIF5-0524_540_1.jpeg", | |
| "https://c7.alamy.com/comp/2FNKTF3/copper-bearing-rock-against-a-gravel-ground-surface-2FNKTF3.jpg", | |
| "https://www.shutterstock.com/shutterstock/photos/2618131965/display_1500/stock-photo-close-up-of-a-rough-weathered-copper-ore-stone-with-natural-crystal-formations-2618131965.jpg", | |
| "https://geologyistheway.com/wp-content/uploads/2021/06/118-milky-quartz.jpg", | |
| "https://geologyistheway.com/wp-content/uploads/2021/06/201210-4-1024x726.jpg" | |
| ] | |
| print("Collecting sample drill core images...") | |
| for i, url in enumerate(sample_urls): | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| img = Image.open(BytesIO(response.content)) | |
| img_path = os.path.join(self.image_dir, f"sample_core_{i+1}.jpg") | |
| img.save(img_path) | |
| print(f"Downloaded: sample_core_{i+1}.jpg") | |
| time.sleep(0.5) # Be respectful to servers | |
| except Exception as e: | |
| print(f"Failed to download {url}: {e}") | |
| print(f"Collected {len(os.listdir(self.image_dir))} images") | |
| def get_dataset_info(self): | |
| """Return information about available datasets""" | |
| return self.sources | |
| if __name__ == "__main__": | |
| collector = DataCollector() | |
| collector.collect_sample_images() | |
| print("\nAvailable geological datasets:") | |
| for source in collector.get_dataset_info(): | |
| print(f"- {source['name']}: {source['description']}") | |
| print(f" URL: {source['url']}\n") | |