Dedeep Vasireddy commited on
Commit
7251cc2
Β·
verified Β·
1 Parent(s): c2061f4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -0
app.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import undetected_chromedriver as uc
3
+ from selenium.webdriver.common.by import By
4
+ from selenium.webdriver.support.ui import WebDriverWait
5
+ from selenium.webdriver.support import expected_conditions as EC
6
+ from selenium.common.exceptions import WebDriverException, TimeoutException
7
+ from PIL import Image
8
+ from io import BytesIO
9
+ import time
10
+ import random
11
+ import os
12
+
13
+ def create_stealth_driver(headless=False, mobile_view=False):
14
+ """Create an undetected Chrome driver"""
15
+ try:
16
+ # Configure undetected-chromedriver options
17
+ options = uc.ChromeOptions()
18
+
19
+ # Basic options
20
+ options.add_argument("--no-sandbox")
21
+ options.add_argument("--disable-dev-shm-usage")
22
+ options.add_argument("--disable-gpu")
23
+ options.add_argument("--disable-software-rasterizer")
24
+
25
+ # Performance optimizations
26
+ options.add_argument("--disable-extensions")
27
+ options.add_argument("--disable-plugins")
28
+ options.add_argument("--disable-images") # Faster loading
29
+
30
+ # Mobile view configuration
31
+ if mobile_view:
32
+ mobile_emulation = {
33
+ "deviceMetrics": {"width": 375, "height": 812, "pixelRatio": 3.0},
34
+ "userAgent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
35
+ }
36
+ options.add_experimental_option("mobileEmulation", mobile_emulation)
37
+
38
+ # Headless mode if requested
39
+ if headless:
40
+ options.add_argument("--headless=new")
41
+
42
+ # Create undetected Chrome driver
43
+ driver = uc.Chrome(
44
+ options=options,
45
+ version_main=None, # Auto-detect Chrome version
46
+ driver_executable_path=None, # Auto-download chromedriver
47
+ browser_executable_path=None, # Use system Chrome
48
+ use_subprocess=True,
49
+ debug=False
50
+ )
51
+
52
+ # Set window size
53
+ if not mobile_view:
54
+ driver.set_window_size(1920, 1080)
55
+
56
+ return driver
57
+
58
+ except Exception as e:
59
+ print(f"Failed to create undetected Chrome driver: {e}")
60
+ return None
61
+
62
+ def wait_for_cloudflare(driver, timeout=30):
63
+ """Wait for Cloudflare challenge to complete"""
64
+ try:
65
+ # Wait for page to be ready
66
+ WebDriverWait(driver, timeout).until(
67
+ lambda d: d.execute_script("return document.readyState") == "complete"
68
+ )
69
+
70
+ # Check for Cloudflare challenge indicators
71
+ cf_selectors = [
72
+ "div[class*='cf-']",
73
+ "div[class*='cloudflare']",
74
+ "div[id*='challenge']",
75
+ "div[class*='challenge']"
76
+ ]
77
+
78
+ start_time = time.time()
79
+ while time.time() - start_time < timeout:
80
+ # Check if we're still on a challenge page
81
+ current_url = driver.current_url
82
+ page_source = driver.page_source.lower()
83
+
84
+ # Common Cloudflare challenge indicators
85
+ cf_indicators = [
86
+ 'checking your browser',
87
+ 'cloudflare',
88
+ 'please wait',
89
+ 'ddos protection',
90
+ 'checking if the site connection is secure'
91
+ ]
92
+
93
+ if not any(indicator in page_source for indicator in cf_indicators):
94
+ print("βœ… Cloudflare challenge passed or not present")
95
+ return True
96
+
97
+ print(f"⏳ Waiting for Cloudflare challenge... ({int(time.time() - start_time)}s)")
98
+ time.sleep(2)
99
+
100
+ print("⚠️ Cloudflare challenge timeout")
101
+ return False
102
+
103
+ except TimeoutException:
104
+ print("⚠️ Page load timeout")
105
+ return False
106
+
107
+ def take_screenshot(url, wait_time=10, use_mobile=False, headless_mode=True, retry_count=2):
108
+ """Take a screenshot with undetected Chrome"""
109
+ if not url.startswith(('http://', 'https://')):
110
+ url = 'https://' + url
111
+
112
+ for attempt in range(retry_count + 1):
113
+ driver = None
114
+ try:
115
+ print(f"πŸ“Έ Attempt {attempt + 1} to screenshot: {url}")
116
+
117
+ # Create undetected Chrome driver
118
+ driver = create_stealth_driver(headless=headless_mode, mobile_view=use_mobile)
119
+ if not driver:
120
+ continue
121
+
122
+ # Navigate with human-like delay
123
+ time.sleep(random.uniform(2, 4))
124
+ print("🌐 Navigating to URL...")
125
+ driver.get(url)
126
+
127
+ # Wait for Cloudflare if present
128
+ if not wait_for_cloudflare(driver):
129
+ if attempt < retry_count:
130
+ print("πŸ”„ Retrying due to protection challenge...")
131
+ continue
132
+ else:
133
+ print("❌ Failed to bypass protection after all attempts")
134
+
135
+ # Additional wait for page to fully load
136
+ print(f"⏳ Waiting {wait_time}s for page to load...")
137
+ time.sleep(wait_time)
138
+
139
+ # Human-like behavior: scroll to trigger lazy loading
140
+ try:
141
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
142
+ time.sleep(1)
143
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
144
+ time.sleep(1)
145
+ driver.execute_script("window.scrollTo(0, 0);")
146
+ time.sleep(2)
147
+ except Exception as e:
148
+ print(f"Scroll behavior failed: {e}")
149
+
150
+ # Take screenshot
151
+ print("πŸ“· Capturing screenshot...")
152
+ screenshot = driver.get_screenshot_as_png()
153
+
154
+ if len(screenshot) < 1000:
155
+ print("⚠️ Screenshot too small, likely error page")
156
+ if attempt < retry_count:
157
+ continue
158
+ return None
159
+
160
+ print("βœ… Screenshot captured successfully!")
161
+ return Image.open(BytesIO(screenshot))
162
+
163
+ except WebDriverException as e:
164
+ print(f"❌ WebDriver error (attempt {attempt + 1}): {e}")
165
+ if attempt < retry_count:
166
+ time.sleep(random.uniform(3, 6))
167
+ continue
168
+
169
+ except Exception as e:
170
+ print(f"❌ Unexpected error (attempt {attempt + 1}): {e}")
171
+ if attempt < retry_count:
172
+ continue
173
+
174
+ finally:
175
+ if driver:
176
+ try:
177
+ driver.quit()
178
+ except:
179
+ pass
180
+
181
+ print("❌ All attempts failed")
182
+ return None
183
+
184
+ # Gradio interface with enhanced features
185
+ def screenshot_interface(url, wait_time, use_mobile_view, headless_mode):
186
+ """Gradio interface function"""
187
+ if not url.strip():
188
+ return None, "Please enter a valid URL"
189
+
190
+ # Adjust wait time based on user input
191
+ wait_seconds = int(wait_time) if wait_time else 10
192
+
193
+ try:
194
+ result = take_screenshot(url, wait_seconds, use_mobile_view, headless_mode)
195
+ if result:
196
+ return result, "βœ… Screenshot captured successfully with undetected Chrome!"
197
+ else:
198
+ return None, "❌ Failed to capture screenshot. The site may have very strong protection."
199
+ except Exception as e:
200
+ return None, f"❌ Error: {str(e)}"
201
+
202
+ # Create Gradio interface
203
+ iface = gr.Interface(
204
+ fn=screenshot_interface,
205
+ inputs=[
206
+ gr.Textbox(
207
+ label="Website URL",
208
+ placeholder="https://example.com or example.com",
209
+ value="https://github.com"
210
+ ),
211
+ gr.Slider(
212
+ minimum=5,
213
+ maximum=30,
214
+ value=10,
215
+ step=1,
216
+ label="Wait Time (seconds)"
217
+ ),
218
+ gr.Checkbox(
219
+ label="Mobile View",
220
+ value=False
221
+ ),
222
+ gr.Checkbox(
223
+ label="Headless Mode (Hidden Browser)",
224
+ value=True
225
+ )
226
+ ],
227
+ outputs=[
228
+ gr.Image(type="pil", label="Screenshot"),
229
+ gr.Textbox(label="Status", lines=2)
230
+ ],
231
+ title="πŸ” Undetected Chrome Screenshot Tool",
232
+ description="Advanced screenshot tool using undetected-chromedriver to bypass Cloudflare and other bot protections.",
233
+ theme="default"
234
+ )
235
+
236
+ if __name__ == "__main__":
237
+ iface.launch(
238
+ share=False,
239
+ inbrowser=True,
240
+ show_error=True
241
+ )