npv2k1 commited on
Commit
1caefe4
·
1 Parent(s): 6e30df1

Enhance capture_page function with improved error handling and page load management

Browse files
Files changed (1) hide show
  1. src/modules/apps/__init__.py +63 -71
src/modules/apps/__init__.py CHANGED
@@ -1,13 +1,16 @@
1
  import gradio as gr
2
  import tempfile
3
  import os
 
 
4
  from selenium import webdriver
5
  from selenium.webdriver.chrome.options import Options
6
  from selenium.webdriver.chrome.service import Service
7
  from selenium.webdriver.support.ui import WebDriverWait
8
  from selenium.webdriver.support import expected_conditions as EC
 
9
  from subprocess import PIPE, STDOUT
10
- import traceback
11
 
12
  print("Gradio app loaded.")
13
 
@@ -20,67 +23,65 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
20
  """
21
  options = Options()
22
 
23
- # Basic options
24
- options.add_argument('--headless=new') # New headless mode
25
- options.add_argument('--no-sandbox') # Required in Docker
26
- options.add_argument('--disable-dev-shm-usage') # Required in Docker
27
-
28
- # Performance and stability options
29
- options.add_argument('--disable-gpu') # Required in Docker
30
  options.add_argument('--disable-software-rasterizer')
31
  options.add_argument('--disable-extensions')
32
  options.add_argument('--disable-infobars')
33
-
34
- # Resource configuration
35
  options.add_argument('--window-size=1920,1080')
36
  options.add_argument('--disable-features=NetworkService,NetworkServiceInProcess')
37
  options.add_argument('--disable-features=site-per-process')
38
-
39
- # Memory and process settings
40
- options.add_argument('--single-process') # Run in single process mode
41
  options.add_argument('--memory-pressure-off')
42
  options.add_argument('--disable-crash-reporter')
43
- options.add_argument('--disable-breakpad') # Disable crash reporting
44
-
45
- # Additional stability options
46
  options.add_argument('--ignore-certificate-errors')
47
  options.add_argument('--disable-setuid-sandbox')
48
  options.add_argument('--disable-web-security')
49
-
50
- # Set specific shared memory /dev/shm size (if needed)
51
- options.add_argument('--disable-dev-shm-usage')
52
  options.add_argument('--shm-size=2g')
53
 
54
- # Set up Chrome service with explicit path to chromedriver and logging
 
 
 
55
  service = Service(
56
- # executable_path='/usr/local/bin/chromedriver',
57
- log_output=PIPE, # Redirect logs to pipe
58
- service_args=['--verbose'] # Enable verbose logging
59
  )
60
 
 
61
  try:
62
  print("Initializing Chrome...")
63
- driver = webdriver.Chrome(
64
- service=service,
65
- options=options
66
- )
67
-
68
  print("Chrome initialized successfully")
 
69
 
70
  try:
71
- print(f"Navigating to URL: {url}")
72
- driver.get(url)
 
 
 
 
 
73
 
74
- # Wait for page load
75
- print("Waiting for page to load...")
76
- driver.implicitly_wait(10) # Increased wait time
 
 
 
 
 
77
 
78
- # Additional wait for dynamic content
79
- from selenium.webdriver.support.ui import WebDriverWait
80
- from selenium.webdriver.support import expected_conditions as EC
81
- # WebDriverWait(driver, 10).until(
82
- # lambda d: d.execute_script('return document.readyState') == 'complete'
83
- # )
84
 
85
  print("Taking screenshot...")
86
  driver.save_screenshot(output_file)
@@ -92,18 +93,22 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
92
  raise
93
  finally:
94
  print("Closing Chrome...")
95
- try:
96
- driver.close() # Close current window
97
- driver.quit() # Quit browser completely
98
- import psutil # For process cleanup
99
- current_pid = os.getpid()
100
- current_process = psutil.Process(current_pid)
101
- children = current_process.children(recursive=True)
102
- for child in children:
103
- if 'chrome' in child.name().lower():
104
- child.terminate()
105
- except Exception as cleanup_error:
106
- print(f"Error during cleanup: {cleanup_error}")
 
 
 
 
107
 
108
  except Exception as e:
109
  print(f"Error initializing Chrome: {str(e)}")
@@ -112,33 +117,24 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
112
  def capture_and_show(url: str):
113
  """Capture webpage and return the image"""
114
  try:
115
- # Get the temporary directory path (defaulting to /tmp if TMPDIR is not set)
116
  temp_dir = os.getenv('TMPDIR', '/tmp')
117
-
118
  try:
119
- # Ensure temp directory exists and has correct permissions
120
  os.makedirs(temp_dir, mode=0o777, exist_ok=True)
121
  print(f"Using temp directory: {temp_dir}")
122
-
123
- # Verify directory is writable
124
  if not os.access(temp_dir, os.W_OK):
125
  print(f"Warning: Temp directory {temp_dir} is not writable")
126
- # Try to create a user-specific temp directory instead
127
  temp_dir = os.path.join('/tmp', f'chrome_screenshots_{os.getuid()}')
128
  os.makedirs(temp_dir, mode=0o777, exist_ok=True)
129
  print(f"Created user-specific temp directory: {temp_dir}")
130
 
131
- # Create temporary file in the specified directory
132
  temp_path = os.path.join(temp_dir, f"screenshot_{os.urandom(8).hex()}.png")
133
  print(f"Temp file path: {temp_path}")
134
 
135
- # Capture the webpage
136
  success = capture_page(url, temp_path)
137
  if not success:
138
  print("Screenshot capture returned False")
139
  return None
140
-
141
- # Verify file was created
142
  if not os.path.exists(temp_path):
143
  print("Screenshot file was not created")
144
  return None
@@ -183,7 +179,6 @@ def create_gradio_app():
183
 
184
  def capture_with_error(url):
185
  try:
186
- # Basic URL validation
187
  if not url:
188
  return None, gr.update(visible=True, value="Please enter a URL")
189
  if not url.startswith(('http://', 'https://')):
@@ -196,7 +191,6 @@ def create_gradio_app():
196
  except Exception as e:
197
  return None, gr.update(visible=True, value=f"Error: {str(e)}")
198
 
199
- # Connect the components
200
  capture_btn.click(
201
  fn=capture_with_error,
202
  inputs=[url_input],
@@ -207,21 +201,19 @@ def create_gradio_app():
207
 
208
  app = create_gradio_app()
209
 
210
- # Configure server settings for Docker deployment
211
- server_port = 7860 # Standard Gradio port
212
- server_name = "0.0.0.0" # Allow external connections
213
 
214
  def main():
215
- """Launch the Gradio application"""
216
  print("Starting Gradio server...")
217
  app.launch(
218
  server_name=server_name,
219
  server_port=server_port,
220
- share=False, # Disable sharing as we're running in Docker
221
- auth=None, # Can be configured if authentication is needed
222
- ssl_verify=False, # Disable SSL verification for internal Docker network
223
  show_error=True,
224
  favicon_path=None
225
  )
226
 
227
- main()
 
1
  import gradio as gr
2
  import tempfile
3
  import os
4
+ import time
5
+ import traceback
6
  from selenium import webdriver
7
  from selenium.webdriver.chrome.options import Options
8
  from selenium.webdriver.chrome.service import Service
9
  from selenium.webdriver.support.ui import WebDriverWait
10
  from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.common.exceptions import TimeoutException
12
  from subprocess import PIPE, STDOUT
13
+ import psutil
14
 
15
  print("Gradio app loaded.")
16
 
 
23
  """
24
  options = Options()
25
 
26
+ # Use new headless mode and basic options for Docker
27
+ options.add_argument('--headless=new')
28
+ options.add_argument('--no-sandbox')
29
+ options.add_argument('--disable-dev-shm-usage')
30
+ options.add_argument('--disable-gpu')
 
 
31
  options.add_argument('--disable-software-rasterizer')
32
  options.add_argument('--disable-extensions')
33
  options.add_argument('--disable-infobars')
 
 
34
  options.add_argument('--window-size=1920,1080')
35
  options.add_argument('--disable-features=NetworkService,NetworkServiceInProcess')
36
  options.add_argument('--disable-features=site-per-process')
37
+ options.add_argument('--single-process')
 
 
38
  options.add_argument('--memory-pressure-off')
39
  options.add_argument('--disable-crash-reporter')
40
+ options.add_argument('--disable-breakpad')
 
 
41
  options.add_argument('--ignore-certificate-errors')
42
  options.add_argument('--disable-setuid-sandbox')
43
  options.add_argument('--disable-web-security')
 
 
 
44
  options.add_argument('--shm-size=2g')
45
 
46
+ # Set page load strategy to 'none' to avoid waiting indefinitely
47
+ options.page_load_strategy = "none"
48
+
49
+ # Set up Chrome service (ensure chromedriver is in your PATH)
50
  service = Service(
51
+ log_output=PIPE,
52
+ service_args=['--verbose']
 
53
  )
54
 
55
+ driver = None
56
  try:
57
  print("Initializing Chrome...")
58
+ driver = webdriver.Chrome(service=service, options=options)
59
+ if not driver:
60
+ raise Exception("Failed to initialize Chrome driver")
61
+
 
62
  print("Chrome initialized successfully")
63
+ driver.implicitly_wait(5)
64
 
65
  try:
66
+ # Set a 30-second timeout for page load
67
+ driver.set_page_load_timeout(30)
68
+ try:
69
+ print(f"Navigating to URL: {url}")
70
+ driver.get(url)
71
+ except TimeoutException:
72
+ print("Page load timed out. Proceeding with screenshot capture...")
73
 
74
+ # Wait for the document ready state to be 'interactive' or 'complete'
75
+ try:
76
+ print("Waiting for document ready state...")
77
+ WebDriverWait(driver, 30).until(
78
+ lambda d: d.execute_script('return document.readyState') in ["interactive", "complete"]
79
+ )
80
+ except TimeoutException:
81
+ print("Document did not reach ready state within timeout, proceeding anyway.")
82
 
83
+ # Additional short delay to allow dynamic content to settle
84
+ time.sleep(2)
 
 
 
 
85
 
86
  print("Taking screenshot...")
87
  driver.save_screenshot(output_file)
 
93
  raise
94
  finally:
95
  print("Closing Chrome...")
96
+ # Wrap cleanup in a try/except to prevent errors if the session is already closed
97
+ if driver:
98
+ try:
99
+ driver.quit()
100
+ except Exception as cleanup_error:
101
+ print(f"Error during driver.quit(): {cleanup_error}")
102
+
103
+ # Optionally clean up any lingering Chrome processes
104
+ try:
105
+ current_pid = os.getpid()
106
+ current_process = psutil.Process(current_pid)
107
+ for child in current_process.children(recursive=True):
108
+ if 'chrome' in child.name().lower():
109
+ child.terminate()
110
+ except Exception as psutil_error:
111
+ print(f"Error during process cleanup: {psutil_error}")
112
 
113
  except Exception as e:
114
  print(f"Error initializing Chrome: {str(e)}")
 
117
  def capture_and_show(url: str):
118
  """Capture webpage and return the image"""
119
  try:
 
120
  temp_dir = os.getenv('TMPDIR', '/tmp')
 
121
  try:
 
122
  os.makedirs(temp_dir, mode=0o777, exist_ok=True)
123
  print(f"Using temp directory: {temp_dir}")
 
 
124
  if not os.access(temp_dir, os.W_OK):
125
  print(f"Warning: Temp directory {temp_dir} is not writable")
 
126
  temp_dir = os.path.join('/tmp', f'chrome_screenshots_{os.getuid()}')
127
  os.makedirs(temp_dir, mode=0o777, exist_ok=True)
128
  print(f"Created user-specific temp directory: {temp_dir}")
129
 
 
130
  temp_path = os.path.join(temp_dir, f"screenshot_{os.urandom(8).hex()}.png")
131
  print(f"Temp file path: {temp_path}")
132
 
 
133
  success = capture_page(url, temp_path)
134
  if not success:
135
  print("Screenshot capture returned False")
136
  return None
137
+
 
138
  if not os.path.exists(temp_path):
139
  print("Screenshot file was not created")
140
  return None
 
179
 
180
  def capture_with_error(url):
181
  try:
 
182
  if not url:
183
  return None, gr.update(visible=True, value="Please enter a URL")
184
  if not url.startswith(('http://', 'https://')):
 
191
  except Exception as e:
192
  return None, gr.update(visible=True, value=f"Error: {str(e)}")
193
 
 
194
  capture_btn.click(
195
  fn=capture_with_error,
196
  inputs=[url_input],
 
201
 
202
  app = create_gradio_app()
203
 
204
+ server_port = 7860
205
+ server_name = "0.0.0.0"
 
206
 
207
  def main():
 
208
  print("Starting Gradio server...")
209
  app.launch(
210
  server_name=server_name,
211
  server_port=server_port,
212
+ share=False,
213
+ auth=None,
214
+ ssl_verify=False,
215
  show_error=True,
216
  favicon_path=None
217
  )
218
 
219
+ main()