Spaces:
Sleeping
Sleeping
| ''' | |
| File: get_data.py | |
| Project: PatternsRecognizer | |
| Author: Milko Videv (milko.videv@thalesgroup.com) | |
| ----- | |
| Last Modified: Friday, 1st March 2024 6:04:04 pm | |
| Modified By: Milko Videv (milko.videv@thalesgroup.com>) | |
| ----- | |
| Copyright 2017 - 2024, Thales DIS, MCS SSH | |
| ----- | |
| HISTORY: | |
| Date By Comments | |
| ---------- --- --------------------------------------------------------- | |
| ''' | |
| from random import random | |
| from fastdownload import download_url | |
| from fastai.vision.all import * | |
| from time import sleep | |
| from fastbook import search_images_ddg | |
| from fastcore.all import * | |
| def search_images_fastbook(term, max_images=30): | |
| print(f"Searching for {max_images} {term}") | |
| return search_images_ddg(term, max_images=max_images) | |
| def get_images(target_path, count, searches): | |
| path = Path(target_path) | |
| for o in searches: | |
| dest = (path/o) | |
| dest.mkdir(exist_ok=True, parents=True) | |
| download_images(dest, urls=search_images_fastbook(f'{o} patterns images', max_images=count)) | |
| sleep(10) | |
| resize_images(path/o, max_size=400, dest=path/o) | |
| print(f"Checking for bad images ...") | |
| failed = verify_images(get_image_files(path)) | |
| failed.map(Path.unlink) | |
| print(f"Removed {len(failed)} bad images") | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 3 or not sys.argv[2].isdigit(): | |
| print("Use: python get_data.py <target path> <images count> <one or more pattern kinds>\n") | |
| print("Example: python get_data.py patterns 1 bulgarian indian japanese") | |
| else: | |
| target_path = sys.argv[1] | |
| count = sys.argv[2] | |
| searches = sys.argv[3:] if len(sys.argv) > 3 else "bulgarian" | |
| if len(searches) == 0: | |
| print(f"Nothing to search for. Exitting.") | |
| sys.exit() | |
| #print(target_path, int(count), searches) | |
| get_images(target_path, int(count), searches) |