Spaces:

sreepathi-ravikumar
/

sample

Sleeping

App Files Files Community

sample / image_fetcher.py

sreepathi-ravikumar

Update image_fetcher.py

5d8e1af verified 12 months ago

raw

history blame

2.32 kB


	import os
	import time
	import random
	import requests
	from PIL import Image
	from io import BytesIO
	from duckduckgo_search import DDGS

	# Create folders
	DIRS = {
	"base": "images",
	"temp": os.path.join("images", "tmp")
	}

	# Ensure directories exist
	path = os.path.join(os.getcwd(), 'images')
	os.makedirs(path, exist_ok=True)

	# Helper: Random delay + headers
	def get_headers():
	user_agents = [
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
	"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0)"
	]
	return {'User-Agent': random.choice(user_agents)}

	# Check if image meets requirements
	def is_valid_image(img):
	width, height = img.size
	ratio = round(width / height, 2)
	return width >= 854 and height >= 480 and abs(ratio - (16 / 9)) <= 0.2 # relaxed ratio check

	# Try to download and validate image
	def validate_image(img_url, keyword):
	try:
	response = requests.get(img_url, headers=get_headers(), timeout=10)
	img = Image.open(BytesIO(response.content)).convert("RGB")
	if not is_valid_image(img):
	print(f"Skipped (not valid): {img_url}")
	return False
	final_path = os.path.join("images", f"{keyword}.jpg")
	img.save(final_path)
	print(f"Saved: {final_path} \| Size: {img.size}")
	return True
	except Exception as e:
	print(f"Failed to validate {img_url}: {e}")
	return False

	# Search and fetch images from DuckDuckGo
	def fetch_images_from_prompts(prompt_list):
	for keyword in prompt_list:
	safe_keyword = keyword.replace(" ", "_").lower()
	print(f"\nSearching for: {keyword}")
	found = False

	try:
	with DDGS() as ddgs:
	results = ddgs.images(keyword, max_results=20)
	for result in results:
	if validate_image(result['image'], safe_keyword):
	found = True
	break
	time.sleep(random.uniform(1.5, 3.5)) # anti-bot delay

	except Exception as e:
	print(f"Rate limit or error: {e}. Sleeping 10 seconds...")
	time.sleep(10) # longer wait on rate limit

	if not found:
	print(f"No suitable image found for: {keyword}")