File size: 8,219 Bytes
3b7ec04
 
 
1caefe4
 
3b7ec04
 
256f37c
f8136bb
 
1caefe4
5e4ff04
1caefe4
f8136bb
3b7ec04
256f37c
3b7ec04
 
 
 
 
 
 
 
0c01d1e
1caefe4
 
 
 
 
5e4ff04
 
 
f8136bb
0c01d1e
 
1caefe4
0c01d1e
 
1caefe4
f8136bb
 
 
0c01d1e
 
1caefe4
 
 
 
5e4ff04
1caefe4
 
5e4ff04
3b7ec04
1caefe4
3b7ec04
f8136bb
1caefe4
 
 
 
f8136bb
1caefe4
f8136bb
 
1caefe4
 
 
 
 
 
 
f8136bb
1caefe4
 
 
 
 
 
 
 
f8136bb
1caefe4
 
f8136bb
 
 
 
 
 
 
 
 
 
 
1caefe4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8136bb
 
 
 
3b7ec04
 
 
 
256f37c
f8136bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1caefe4
f8136bb
 
 
 
 
 
 
 
 
 
 
 
3b7ec04
f8136bb
 
 
3b7ec04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b68547c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b7ec04
b68547c
3b7ec04
b68547c
3b7ec04
 
 
 
 
 
1caefe4
 
3b7ec04
 
 
 
 
 
1caefe4
 
 
3b7ec04
 
 
 
1caefe4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import gradio as gr
import tempfile
import os
import time
import traceback
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from subprocess import PIPE, STDOUT
import psutil

print("Gradio app loaded.")

def capture_page(url: str, output_file: str = "screenshot.png"):
    """
    Captures a screenshot of the given webpage.
    
    :param url: The URL of the webpage to capture.
    :param output_file: The filename to save the screenshot.
    """
    options = Options()
    
    # Use new headless mode and basic options for Docker
    options.add_argument('--headless=new')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-gpu')
    options.add_argument('--disable-software-rasterizer')
    options.add_argument('--disable-extensions')
    options.add_argument('--disable-infobars')
    options.add_argument('--window-size=1920,1080')
    options.add_argument('--disable-features=NetworkService,NetworkServiceInProcess')
    options.add_argument('--disable-features=site-per-process')
    options.add_argument('--single-process')
    options.add_argument('--memory-pressure-off')
    options.add_argument('--disable-crash-reporter')
    options.add_argument('--disable-breakpad')
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--disable-setuid-sandbox')
    options.add_argument('--disable-web-security')
    options.add_argument('--shm-size=2g')
    
    # Set page load strategy to 'none' to avoid waiting indefinitely
    options.page_load_strategy = "none"
    
    # Set up Chrome service (ensure chromedriver is in your PATH)
    service = Service(
        log_output=PIPE,
        service_args=['--verbose']
    )
    
    driver = None
    try:
        print("Initializing Chrome...")
        driver = webdriver.Chrome(service=service, options=options)
        if not driver:
            raise Exception("Failed to initialize Chrome driver")
            
        print("Chrome initialized successfully")
        driver.implicitly_wait(5)
        
        try:
            # Set a 30-second timeout for page load
            driver.set_page_load_timeout(30)
            try:
                print(f"Navigating to URL: {url}")
                driver.get(url)
            except TimeoutException:
                print("Page load timed out. Proceeding with screenshot capture...")
            
            # Wait for the document ready state to be 'interactive' or 'complete'
            try:
                print("Waiting for document ready state...")
                WebDriverWait(driver, 30).until(
                    lambda d: d.execute_script('return document.readyState') in ["interactive", "complete"]
                )
            except TimeoutException:
                print("Document did not reach ready state within timeout, proceeding anyway.")
            
            # Additional short delay to allow dynamic content to settle
            time.sleep(2)
            
            print("Taking screenshot...")
            driver.save_screenshot(output_file)
            print(f"Screenshot saved: {output_file}")
            return True
            
        except Exception as e:
            print(f"Error during page capture: {str(e)}")
            raise
        finally:
            print("Closing Chrome...")
            # Wrap cleanup in a try/except to prevent errors if the session is already closed
            if driver:
                try:
                    driver.quit()
                except Exception as cleanup_error:
                    print(f"Error during driver.quit(): {cleanup_error}")
                
                # Optionally clean up any lingering Chrome processes
                try:
                    current_pid = os.getpid()
                    current_process = psutil.Process(current_pid)
                    for child in current_process.children(recursive=True):
                        if 'chrome' in child.name().lower():
                            child.terminate()
                except Exception as psutil_error:
                    print(f"Error during process cleanup: {psutil_error}")
            
    except Exception as e:
        print(f"Error initializing Chrome: {str(e)}")
        raise Exception(f"Failed to initialize Chrome: {str(e)}")

def capture_and_show(url: str):
    """Capture webpage and return the image"""
    try:
        temp_dir = os.getenv('TMPDIR', '/tmp')
        try:
            os.makedirs(temp_dir, mode=0o777, exist_ok=True)
            print(f"Using temp directory: {temp_dir}")
            if not os.access(temp_dir, os.W_OK):
                print(f"Warning: Temp directory {temp_dir} is not writable")
                temp_dir = os.path.join('/tmp', f'chrome_screenshots_{os.getuid()}')
                os.makedirs(temp_dir, mode=0o777, exist_ok=True)
                print(f"Created user-specific temp directory: {temp_dir}")
                
            temp_path = os.path.join(temp_dir, f"screenshot_{os.urandom(8).hex()}.png")
            print(f"Temp file path: {temp_path}")
            
            success = capture_page(url, temp_path)
            if not success:
                print("Screenshot capture returned False")
                return None
                
            if not os.path.exists(temp_path):
                print("Screenshot file was not created")
                return None
                
            print("Screenshot captured successfully")
            return temp_path
            
        except OSError as e:
            print(f"OS Error: {str(e)}")
            print(f"Stack trace: {traceback.format_exc()}")
            return None
            
    except Exception as e:
        print(f"Error in capture_and_show: {str(e)}")
        print(f"Stack trace: {traceback.format_exc()}")
        return None
    
def create_gradio_app():
    """Create the main Gradio application with all components"""
    with gr.Blocks() as app:
        gr.Markdown("# Webpage Screenshot Capture")
        
        with gr.Row():
            url_input = gr.Textbox(
                label="Website URL",
                placeholder="Enter website URL (e.g., https://www.example.com)",
                scale=4
            )
            capture_btn = gr.Button("Capture", scale=1)
            
        with gr.Row():
            output_image = gr.Image(
                label="Captured Screenshot",
                type="filepath"
            )
            
        error_output = gr.Textbox(
            label="Error Message",
            visible=False,
            interactive=False
        )
            
        def capture_with_error(url):
            try:
                if not url:
                    return None, gr.update(visible=True, value="Please enter a URL")
                if not url.startswith(('http://', 'https://')):
                    return None, gr.update(visible=True, value="Please enter a valid URL starting with http:// or https://")
                    
                result = capture_and_show(url)
                if result is None:
                    return None, gr.update(visible=True, value="Failed to capture screenshot. Please check the URL and try again.")
                return result, gr.update(visible=False, value="")
            except Exception as e:
                return None, gr.update(visible=True, value=f"Error: {str(e)}")
            
        capture_btn.click(
            fn=capture_with_error,
            inputs=[url_input],
            outputs=[output_image, error_output]
        )
        
    return app

app = create_gradio_app()

server_port = 7860
server_name = "0.0.0.0"

def main():
    print("Starting Gradio server...")
    app.launch(
        server_name=server_name,
        server_port=server_port,
        share=False,
        auth=None,
        ssl_verify=False,
        show_error=True,
        favicon_path=None
    )

main()