sreepathi-ravikumar commited on
Commit
4c88222
·
verified ·
1 Parent(s): 077403a

Create image_fetcher.py

Browse files
Files changed (1) hide show
  1. image_fetcher.py +75 -0
image_fetcher.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import random
4
+ import requests
5
+ from PIL import Image
6
+ from io import BytesIO
7
+ from duckduckgo_search import DDGS
8
+
9
+ # Create folders
10
+ os.makedirs("images", exist_ok=True)
11
+ os.makedirs("images/tmp", exist_ok=True)
12
+
13
+ # Helper: Random delay + headers
14
+ def get_headers():
15
+ user_agents = [
16
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
17
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
18
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0)"
19
+ ]
20
+ return {'User-Agent': random.choice(user_agents)}
21
+
22
+ # Check if image meets requirements
23
+ def is_valid_image(img):
24
+ width, height = img.size
25
+ ratio = round(width / height, 2)
26
+ return width >= 854 and height >= 480 and abs(ratio - (16 / 9)) <= 0.2 # relaxed ratio check
27
+
28
+ # Try to download and validate image
29
+ def validate_image(img_url, keyword):
30
+ try:
31
+ response = requests.get(img_url, headers=get_headers(), timeout=10)
32
+ img = Image.open(BytesIO(response.content)).convert("RGB")
33
+ if not is_valid_image(img):
34
+ print(f"Skipped (not valid): {img_url}")
35
+ return False
36
+ final_path = os.path.join("images", f"{keyword}.jpg")
37
+ img.save(final_path)
38
+ print(f"Saved: {final_path} | Size: {img.size}")
39
+ return True
40
+ except Exception as e:
41
+ print(f"Failed to validate {img_url}: {e}")
42
+ return False
43
+
44
+ # Search and fetch images from DuckDuckGo
45
+ def fetch_images_from_prompts(prompt_list):
46
+ saved_images = []
47
+
48
+ for keyword in prompt_list:
49
+ safe_keyword = keyword.replace(" ", "_").lower()
50
+ print(f"\nSearching for: {keyword}")
51
+ found = False
52
+
53
+ try:
54
+ with DDGS() as ddgs:
55
+ results = ddgs.images(keyword, max_results=20)
56
+ for result in results:
57
+ if validate_image(result['image'], safe_keyword):
58
+ saved_path = os.path.join("images", f"{safe_keyword}.jpg")
59
+ saved_images.append(saved_path)
60
+ found = True
61
+ break
62
+ time.sleep(random.uniform(1.5, 3.5)) # anti-bot delay
63
+
64
+ except Exception as e:
65
+ print(f"Rate limit or error: {e}. Sleeping 10 seconds...")
66
+ time.sleep(10)
67
+
68
+ if not found:
69
+ print(f"No suitable image found for: {keyword}")
70
+
71
+ return saved_images
72
+
73
+ # Main callable function
74
+ def get_images(prompt_list):
75
+ return fetch_images_from_prompts(prompt_list)