sample / image_fetcher.py
sreepathi-ravikumar's picture
Update image_fetcher.py
5d8e1af verified
raw
history blame
2.32 kB
import os
import time
import random
import requests
from PIL import Image
from io import BytesIO
from duckduckgo_search import DDGS
# Create folders
DIRS = {
"base": "images",
"temp": os.path.join("images", "tmp")
}
# Ensure directories exist
path = os.path.join(os.getcwd(), 'images')
os.makedirs(path, exist_ok=True)
# Helper: Random delay + headers
def get_headers():
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0)"
]
return {'User-Agent': random.choice(user_agents)}
# Check if image meets requirements
def is_valid_image(img):
width, height = img.size
ratio = round(width / height, 2)
return width >= 854 and height >= 480 and abs(ratio - (16 / 9)) <= 0.2 # relaxed ratio check
# Try to download and validate image
def validate_image(img_url, keyword):
try:
response = requests.get(img_url, headers=get_headers(), timeout=10)
img = Image.open(BytesIO(response.content)).convert("RGB")
if not is_valid_image(img):
print(f"Skipped (not valid): {img_url}")
return False
final_path = os.path.join("images", f"{keyword}.jpg")
img.save(final_path)
print(f"Saved: {final_path} | Size: {img.size}")
return True
except Exception as e:
print(f"Failed to validate {img_url}: {e}")
return False
# Search and fetch images from DuckDuckGo
def fetch_images_from_prompts(prompt_list):
for keyword in prompt_list:
safe_keyword = keyword.replace(" ", "_").lower()
print(f"\nSearching for: {keyword}")
found = False
try:
with DDGS() as ddgs:
results = ddgs.images(keyword, max_results=20)
for result in results:
if validate_image(result['image'], safe_keyword):
found = True
break
time.sleep(random.uniform(1.5, 3.5)) # anti-bot delay
except Exception as e:
print(f"Rate limit or error: {e}. Sleeping 10 seconds...")
time.sleep(10) # longer wait on rate limit
if not found:
print(f"No suitable image found for: {keyword}")