rajthakkar123 commited on
Commit
6e7062c
·
verified ·
1 Parent(s): 80c4cbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -16
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import os
2
  import tempfile
 
 
 
3
  from playwright.sync_api import sync_playwright
4
  import gradio as gr
5
 
@@ -7,33 +10,134 @@ BROWSER_ARGS = [
7
  "--no-sandbox",
8
  "--disable-setuid-sandbox",
9
  "--disable-dev-shm-usage",
 
 
 
10
  ]
11
 
12
  def open_and_screenshot(url="https://example.com"):
13
- with sync_playwright() as pw:
14
- executable_path = pw.chromium.executable_path
15
- print(f"Chromium executable path: {executable_path}")
16
-
17
- browser = pw.chromium.launch(headless=True, args=BROWSER_ARGS)
18
- page = browser.new_page()
19
- page.goto(url, wait_until="domcontentloaded", timeout=30000)
20
- title = page.title()
21
-
22
- # take screenshot
23
- tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
24
- screenshot_path = tmp.name
25
- page.screenshot(path=screenshot_path, full_page=True)
26
- browser.close()
27
- return title, screenshot_path
28
 
 
 
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  with gr.Blocks(title="Chromium-launch Space (Playwright)") as demo:
31
  url_input = gr.Textbox(value="https://example.com", label="URL")
32
  run_btn = gr.Button("Open URL in Chromium")
33
  output_title = gr.Textbox(label="Page title")
34
  output_img = gr.Image(label="Screenshot")
 
35
 
36
- run_btn.click(fn=open_and_screenshot, inputs=url_input, outputs=[output_title, output_img])
37
 
38
  if __name__ == "__main__":
39
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
1
  import os
2
  import tempfile
3
+ import socket
4
+ import traceback
5
+ from urllib.parse import urlparse
6
  from playwright.sync_api import sync_playwright
7
  import gradio as gr
8
 
 
10
  "--no-sandbox",
11
  "--disable-setuid-sandbox",
12
  "--disable-dev-shm-usage",
13
+ "--disable-gpu",
14
+ "--single-process",
15
+ "--no-zygote",
16
  ]
17
 
18
  def open_and_screenshot(url="https://example.com"):
19
+ diag = {}
20
+ screenshot_path = None
21
+ title = ""
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # quick env/proxy info
24
+ diag['HTTP_PROXY'] = os.environ.get('HTTP_PROXY') or os.environ.get('http_proxy')
25
+ diag['HTTPS_PROXY'] = os.environ.get('HTTPS_PROXY') or os.environ.get('https_proxy')
26
+ diag['NO_PROXY'] = os.environ.get('NO_PROXY') or os.environ.get('no_proxy')
27
 
28
+ # parse host for DNS test
29
+ try:
30
+ parsed = urlparse(url)
31
+ host = parsed.netloc or parsed.path
32
+ # strip possible port
33
+ host = host.split(':')[0]
34
+ diag['url_host'] = host
35
+ except Exception as e:
36
+ host = None
37
+ diag['url_host_parse_error'] = str(e)
38
+
39
+ # DNS resolution
40
+ if host:
41
+ try:
42
+ resolved_ip = socket.gethostbyname(host)
43
+ diag['resolved_ip'] = resolved_ip
44
+ except Exception as e:
45
+ diag['resolved_ip_error'] = repr(e)
46
+ resolved_ip = None
47
+ else:
48
+ resolved_ip = None
49
+
50
+ # optional pre-check with requests (safe, wrapped)
51
+ try:
52
+ import requests
53
+ try:
54
+ resp = requests.get(url, timeout=6)
55
+ diag['requests_status_code'] = resp.status_code
56
+ except Exception as e:
57
+ diag['requests_error'] = repr(e)
58
+ except Exception:
59
+ diag['requests_not_installed'] = True
60
+
61
+ # Playwright diagnostics + navigation attempts
62
+ try:
63
+ with sync_playwright() as pw:
64
+ # executable path (property, not callable)
65
+ exec_path = getattr(pw.chromium, "executable_path", None)
66
+ diag['playwright_chromium_executable'] = exec_path
67
+
68
+ browser = pw.chromium.launch(headless=True, args=BROWSER_ARGS)
69
+ ctx = browser.new_context()
70
+ page = ctx.new_page()
71
+
72
+ # increase navigation timeout to 60s (60000 ms)
73
+ page.set_default_navigation_timeout(60000)
74
+
75
+ try:
76
+ page.goto(url, wait_until="domcontentloaded", timeout=60000)
77
+ title = page.title()
78
+ # take screenshot
79
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
80
+ screenshot_path = tmp.name
81
+ page.screenshot(path=screenshot_path, full_page=True)
82
+ diag['playwright_result'] = "success"
83
+ diag['navigator_user_agent'] = page.evaluate("navigator.userAgent")
84
+ except Exception as e_goto:
85
+ diag['playwright_goto_trace'] = traceback.format_exc()
86
+
87
+ # fallback 1: try accessing by resolved IP (bypass DNS), if we have one
88
+ if resolved_ip:
89
+ try:
90
+ # try plain http to the resolved IP (some hosts block https by IP)
91
+ alt_url = f"http://{resolved_ip}/"
92
+ page.goto(alt_url, wait_until="domcontentloaded", timeout=15000)
93
+ title = page.title()
94
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
95
+ screenshot_path = tmp.name
96
+ page.screenshot(path=screenshot_path, full_page=True)
97
+ diag['playwright_fallback_ip'] = f"success via {alt_url}"
98
+ except Exception:
99
+ diag['playwright_fallback_ip_trace'] = traceback.format_exc()
100
+
101
+ # fallback 2: try switching https <-> http on original hostname
102
+ try:
103
+ if url.startswith("https://"):
104
+ alt_url2 = url.replace("https://", "http://", 1)
105
+ else:
106
+ alt_url2 = url.replace("http://", "https://", 1)
107
+ page.goto(alt_url2, wait_until="domcontentloaded", timeout=15000)
108
+ title = page.title()
109
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
110
+ screenshot_path = tmp.name
111
+ page.screenshot(path=screenshot_path, full_page=True)
112
+ diag['playwright_fallback_proto'] = f"success via {alt_url2}"
113
+ except Exception:
114
+ diag['playwright_fallback_proto_trace'] = traceback.format_exc()
115
+
116
+ try:
117
+ browser.close()
118
+ except Exception:
119
+ pass
120
+
121
+ except Exception:
122
+ diag['playwright_launch_trace'] = traceback.format_exc()
123
+
124
+ # flatten diag for return
125
+ diag_lines = []
126
+ for k, v in diag.items():
127
+ diag_lines.append(f"{k}: {v}")
128
+ diag_text = "\n".join(diag_lines)
129
+
130
+ return title, screenshot_path, diag_text
131
+
132
+ # Example Gradio UI (replace your existing Blocks UI's call)
133
  with gr.Blocks(title="Chromium-launch Space (Playwright)") as demo:
134
  url_input = gr.Textbox(value="https://example.com", label="URL")
135
  run_btn = gr.Button("Open URL in Chromium")
136
  output_title = gr.Textbox(label="Page title")
137
  output_img = gr.Image(label="Screenshot")
138
+ output_diag = gr.Textbox(label="Diagnostics", lines=12)
139
 
140
+ run_btn.click(fn=open_and_screenshot, inputs=url_input, outputs=[output_title, output_img, output_diag])
141
 
142
  if __name__ == "__main__":
143
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))