tomo2chin2 commited on
Commit
e6532da
·
verified ·
1 Parent(s): da755ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -80
app.py CHANGED
@@ -1,101 +1,92 @@
1
- import os, tempfile, time
2
  import gradio as gr
3
  from selenium import webdriver
4
  from selenium.webdriver.chrome.options import Options
 
 
 
5
  from PIL import Image
6
  from io import BytesIO
 
 
 
7
 
8
- def html_to_screenshot(html_code: str) -> Image.Image:
9
- # Configure Selenium to use headless Chrome
10
- chrome_options = Options()
11
- chrome_options.add_argument("--headless")
12
- chrome_options.add_argument("--no-sandbox")
13
- chrome_options.add_argument("--disable-dev-shm-usage")
14
- chrome_options.add_argument("--disable-gpu")
 
 
 
 
 
 
15
 
16
- # Launch headless Chrome
17
- driver = webdriver.Chrome(options=chrome_options)
18
  try:
19
- # Write HTML code to a temporary file
20
- tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
21
- tmp_path = tmp_file.name
22
- tmp_file.write(html_code.encode('utf-8'))
23
- tmp_file.close()
24
- driver.get(f"file://{tmp_path}")
25
- time.sleep(1) # allow any dynamic content to load if needed
26
 
27
- # Get total page dimensions
28
- total_width = driver.execute_script(
29
- "return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth);")
30
- total_height = driver.execute_script(
31
- "return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);")
32
- # Set the browser window to full content width and a fixed viewport height
33
- viewport_height = 1000 # px
34
- driver.set_window_size(total_width, viewport_height)
35
 
36
- # Mark all fixed or sticky elements
37
- driver.execute_script(
38
- "document.querySelectorAll('*').forEach(el => {"
39
- " const pos = window.getComputedStyle(el).position;"
40
- " if(pos === 'fixed' || pos === 'sticky') { el.setAttribute('data-fixed', 'true'); }"
41
- "});")
42
- # Screenshot the top of the page (with sticky elements visible)
43
- screenshots = []
44
- png_data = driver.get_screenshot_as_png()
45
- screenshots.append(Image.open(BytesIO(png_data)))
46
 
47
- # Hide sticky/fixed elements before taking further screenshots
48
  driver.execute_script(
49
- "document.querySelectorAll('[data-fixed=\"true\"]').forEach(el => el.style.visibility='hidden');")
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Scroll and capture screenshots until reaching the bottom
52
- pixels_scrolled = viewport_height
53
- while True:
54
- if pixels_scrolled >= total_height:
55
- break # done if we've covered the whole height
56
- driver.execute_script(f"window.scrollTo(0, {pixels_scrolled});")
57
- time.sleep(0.2)
58
- # Check actual scroll position in case we hit the bottom
59
- current_offset = driver.execute_script("return window.pageYOffset;")
60
- if current_offset < pixels_scrolled:
61
- # Adjust if we couldn't scroll the full amount (at bottom of page)
62
- current_offset = total_height - viewport_height
63
- driver.execute_script(f"window.scrollTo(0, {current_offset});")
64
- time.sleep(0.1)
65
- # Capture screenshot at the current offset
66
- png_data = driver.get_screenshot_as_png()
67
- screenshots.append(Image.open(BytesIO(png_data)))
68
- # Prepare for next iteration
69
- pixels_scrolled = current_offset + viewport_height
70
 
71
- # Stitch screenshots into one tall image
72
- # Compute overlap if the last screenshot went beyond the content bottom
73
- remainder = total_height % viewport_height
74
- overlap = viewport_height - remainder if remainder != 0 else 0
75
- if overlap and len(screenshots) > 1:
76
- # Crop the overlapping top part from the last image
77
- last_img = screenshots[-1]
78
- screenshots[-1] = last_img.crop((0, overlap, last_img.width, last_img.height))
79
- # Combine images vertically
80
- total_combined_height = sum(img.height for img in screenshots)
81
- combined_img = Image.new("RGB", (total_width, total_combined_height))
82
- y = 0
83
- for img in screenshots:
84
- combined_img.paste(img, (0, y))
85
- y += img.height
86
 
87
- return combined_img
88
  finally:
89
  driver.quit()
 
 
 
 
 
90
 
91
- # Set up Gradio interface
92
- interface = gr.Interface(
93
- fn=html_to_screenshot,
94
- inputs=gr.Textbox(label="HTML Code", lines=15),
95
- outputs=gr.Image(type="pil"),
96
- title="HTML Full-Page Screenshot",
97
- description="Enter HTML code and generate a full-page screenshot image."
98
  )
99
 
100
  if __name__ == "__main__":
101
- interface.launch()
 
 
1
  import gradio as gr
2
  from selenium import webdriver
3
  from selenium.webdriver.chrome.options import Options
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
  from PIL import Image
8
  from io import BytesIO
9
+ import tempfile
10
+ import time
11
+ import os
12
 
13
+ def render_fullpage_screenshot(html_code):
14
+ # 1) HTMLコードを一時ファイルに保存
15
+ tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
16
+ tmp_path = tmp_file.name
17
+ tmp_file.write(html_code.encode('utf-8'))
18
+ tmp_file.close()
19
+
20
+ # 2) ヘッドレスChrome(Chromium)起動オプション
21
+ options = Options()
22
+ options.add_argument("--headless")
23
+ options.add_argument("--no-sandbox")
24
+ options.add_argument("--disable-dev-shm-usage")
25
+ options.add_argument("--force-device-scale-factor=1")
26
 
 
 
27
  try:
28
+ driver = webdriver.Chrome(options=options)
 
 
 
 
 
 
29
 
30
+ # 3) まずはある程度のウィンドウサイズでページを開く
31
+ driver.set_window_size(1200, 800)
32
+ driver.get("file://" + tmp_path)
 
 
 
 
 
33
 
34
+ # 4) ページのロードを待機
35
+ WebDriverWait(driver, 10).until(
36
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
37
+ )
38
+ # 外部リソース読み込み等の安定化のため余分に待機
39
+ time.sleep(2)
 
 
 
 
40
 
41
+ # 5) スクロールバーを写したくない場合はCSSで非表示にする
42
  driver.execute_script(
43
+ "document.documentElement.style.overflow = 'hidden';"
44
+ "document.body.style.overflow = 'hidden';"
45
+ )
46
+
47
+ # 6) ページ全体の幅・高さを正確に取得
48
+ # bodyとdocumentElementの値を比較し、より大きい方を使用
49
+ scroll_width = driver.execute_script(
50
+ "return Math.max("
51
+ "document.body.scrollWidth, document.documentElement.scrollWidth)"
52
+ )
53
+ scroll_height = driver.execute_script(
54
+ "return Math.max("
55
+ "document.body.scrollHeight, document.documentElement.scrollHeight)"
56
+ )
57
 
58
+ # 7) ウィンドウサイズをページ全体に合わせる
59
+ # 少し余裕をもたせるため+50などしてもOK
60
+ driver.set_window_size(scroll_width, scroll_height)
61
+ time.sleep(2) # レイアウトが変わるので少し待つ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # 念のためページ最上部にスクロール
64
+ driver.execute_script("window.scrollTo(0, 0)")
65
+ time.sleep(1)
66
+
67
+ # 8) スクリーンショットを取得
68
+ png = driver.get_screenshot_as_png()
69
+
70
+ except Exception as e:
71
+ # 失敗時は1x1の黒画像を返す
72
+ return Image.new('RGB', (1, 1), color=(0, 0, 0))
 
 
 
 
 
73
 
 
74
  finally:
75
  driver.quit()
76
+ if os.path.exists(tmp_path):
77
+ os.remove(tmp_path)
78
+
79
+ # 9) PNGバイナリをPIL.Imageに変換して返す
80
+ return Image.open(BytesIO(png))
81
 
82
+ # Gradioインターフェース
83
+ iface = gr.Interface(
84
+ fn=render_fullpage_screenshot,
85
+ inputs=gr.Textbox(lines=15, label="HTMLコード入力"),
86
+ outputs=gr.Image(type="pil", label="ページ全体のスクリーンショット"),
87
+ title="Full Page Screenshot App",
88
+ description="HTMLをヘッドレスブラウザでレンダリングし、ページ全体を1枚の画像として取得します。"
89
  )
90
 
91
  if __name__ == "__main__":
92
+ iface.launch()