Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import random | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| from duckduckgo_search import DDGS | |
| # Create folders | |
| DIRS = { | |
| "base": "images", | |
| "temp": os.path.join("images", "tmp") | |
| } | |
| # Ensure directories exist | |
| path = os.path.join(os.getcwd(), 'images') | |
| os.makedirs(path, exist_ok=True) | |
| # Helper: Random delay + headers | |
| def get_headers(): | |
| user_agents = [ | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", | |
| "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0)" | |
| ] | |
| return {'User-Agent': random.choice(user_agents)} | |
| # Check if image meets requirements | |
| def is_valid_image(img): | |
| width, height = img.size | |
| ratio = round(width / height, 2) | |
| return width >= 854 and height >= 480 and abs(ratio - (16 / 9)) <= 0.2 # relaxed ratio check | |
| # Try to download and validate image | |
| def validate_image(img_url, keyword): | |
| try: | |
| response = requests.get(img_url, headers=get_headers(), timeout=10) | |
| img = Image.open(BytesIO(response.content)).convert("RGB") | |
| if not is_valid_image(img): | |
| print(f"Skipped (not valid): {img_url}") | |
| return False | |
| final_path = os.path.join("images", f"{keyword}.jpg") | |
| img.save(final_path) | |
| print(f"Saved: {final_path} | Size: {img.size}") | |
| return True | |
| except Exception as e: | |
| print(f"Failed to validate {img_url}: {e}") | |
| return False | |
| # Search and fetch images from DuckDuckGo | |
| def fetch_images_from_prompts(prompt_list): | |
| for keyword in prompt_list: | |
| safe_keyword = keyword.replace(" ", "_").lower() | |
| print(f"\nSearching for: {keyword}") | |
| found = False | |
| try: | |
| with DDGS() as ddgs: | |
| results = ddgs.images(keyword, max_results=20) | |
| for result in results: | |
| if validate_image(result['image'], safe_keyword): | |
| found = True | |
| break | |
| time.sleep(random.uniform(1.5, 3.5)) # anti-bot delay | |
| except Exception as e: | |
| print(f"Rate limit or error: {e}. Sleeping 10 seconds...") | |
| time.sleep(10) # longer wait on rate limit | |
| if not found: | |
| print(f"No suitable image found for: {keyword}") | |