Spaces:

tomo2chin2
/

HTMLviewer2

Paused

App Files Files Community

tomo2chin2 commited on Mar 24, 2025

Commit

e6532da

verified ·

1 Parent(s): da755ee

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -80

app.py CHANGED Viewed

@@ -1,101 +1,92 @@
-import os, tempfile, time
 import gradio as gr
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from PIL import Image
 from io import BytesIO
-def html_to_screenshot(html_code: str) -> Image.Image:
-    # Configure Selenium to use headless Chrome
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    chrome_options.add_argument("--disable-gpu")
-    # Launch headless Chrome
-    driver = webdriver.Chrome(options=chrome_options)
     try:
-        # Write HTML code to a temporary file
-        tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
-        tmp_path = tmp_file.name
-        tmp_file.write(html_code.encode('utf-8'))
-        tmp_file.close()
-        driver.get(f"file://{tmp_path}")
-        time.sleep(1)  # allow any dynamic content to load if needed
-        # Get total page dimensions
-        total_width = driver.execute_script(
-            "return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth);")
-        total_height = driver.execute_script(
-            "return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);")
-        # Set the browser window to full content width and a fixed viewport height
-        viewport_height = 1000  # px
-        driver.set_window_size(total_width, viewport_height)
-        # Mark all fixed or sticky elements
-        driver.execute_script(
-            "document.querySelectorAll('*').forEach(el => {"
-            "  const pos = window.getComputedStyle(el).position;"
-            "  if(pos === 'fixed' || pos === 'sticky') { el.setAttribute('data-fixed', 'true'); }"
-            "});")
-        # Screenshot the top of the page (with sticky elements visible)
-        screenshots = []
-        png_data = driver.get_screenshot_as_png()
-        screenshots.append(Image.open(BytesIO(png_data)))
-        # Hide sticky/fixed elements before taking further screenshots
         driver.execute_script(
-            "document.querySelectorAll('[data-fixed=\"true\"]').forEach(el => el.style.visibility='hidden');")
-        # Scroll and capture screenshots until reaching the bottom
-        pixels_scrolled = viewport_height
-        while True:
-            if pixels_scrolled >= total_height:
-                break  # done if we've covered the whole height
-            driver.execute_script(f"window.scrollTo(0, {pixels_scrolled});")
-            time.sleep(0.2)
-            # Check actual scroll position in case we hit the bottom
-            current_offset = driver.execute_script("return window.pageYOffset;")
-            if current_offset < pixels_scrolled:
-                # Adjust if we couldn't scroll the full amount (at bottom of page)
-                current_offset = total_height - viewport_height
-                driver.execute_script(f"window.scrollTo(0, {current_offset});")
-                time.sleep(0.1)
-            # Capture screenshot at the current offset
-            png_data = driver.get_screenshot_as_png()
-            screenshots.append(Image.open(BytesIO(png_data)))
-            # Prepare for next iteration
-            pixels_scrolled = current_offset + viewport_height
-        # Stitch screenshots into one tall image
-        # Compute overlap if the last screenshot went beyond the content bottom
-        remainder = total_height % viewport_height
-        overlap = viewport_height - remainder if remainder != 0 else 0
-        if overlap and len(screenshots) > 1:
-            # Crop the overlapping top part from the last image
-            last_img = screenshots[-1]
-            screenshots[-1] = last_img.crop((0, overlap, last_img.width, last_img.height))
-        # Combine images vertically
-        total_combined_height = sum(img.height for img in screenshots)
-        combined_img = Image.new("RGB", (total_width, total_combined_height))
-        y = 0
-        for img in screenshots:
-            combined_img.paste(img, (0, y))
-            y += img.height
-        return combined_img
     finally:
         driver.quit()
-# Set up Gradio interface
-interface = gr.Interface(
-    fn=html_to_screenshot,
-    inputs=gr.Textbox(label="HTML Code", lines=15),
-    outputs=gr.Image(type="pil"),
-    title="HTML Full-Page Screenshot",
-    description="Enter HTML code and generate a full-page screenshot image."
 )
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
 from PIL import Image
 from io import BytesIO
+import tempfile
+import time
+import os
+def render_fullpage_screenshot(html_code):
+    # 1) HTMLコードを一時ファイルに保存
+    tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
+    tmp_path = tmp_file.name
+    tmp_file.write(html_code.encode('utf-8'))
+    tmp_file.close()
+    # 2) ヘッドレスChrome(Chromium)起動オプション
+    options = Options()
+    options.add_argument("--headless")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--force-device-scale-factor=1")
     try:
+        driver = webdriver.Chrome(options=options)
+        # 3) まずはある程度のウィンドウサイズでページを開く
+        driver.set_window_size(1200, 800)
+        driver.get("file://" + tmp_path)
+        # 4) ページのロードを待機
+        WebDriverWait(driver, 10).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+        # 外部リソース読み込み等の安定化のため余分に待機
+        time.sleep(2)
+        # 5) スクロールバーを写したくない場合はCSSで非表示にする
         driver.execute_script(
+            "document.documentElement.style.overflow = 'hidden';"
+            "document.body.style.overflow = 'hidden';"
+        )
+        # 6) ページ全体の幅・高さを正確に取得
+        #    bodyとdocumentElementの値を比較し、より大きい方を使用
+        scroll_width = driver.execute_script(
+            "return Math.max("
+            "document.body.scrollWidth, document.documentElement.scrollWidth)"
+        )
+        scroll_height = driver.execute_script(
+            "return Math.max("
+            "document.body.scrollHeight, document.documentElement.scrollHeight)"
+        )
+        # 7) ウィンドウサイズをページ全体に合わせる
+        #    少し余裕をもたせるため+50などしてもOK
+        driver.set_window_size(scroll_width, scroll_height)
+        time.sleep(2)  # レイアウトが変わるので少し待つ
+        # 念のためページ最上部にスクロール
+        driver.execute_script("window.scrollTo(0, 0)")
+        time.sleep(1)
+        # 8) スクリーンショットを取得
+        png = driver.get_screenshot_as_png()
+    except Exception as e:
+        # 失敗時は1x1の黒画像を返す
+        return Image.new('RGB', (1, 1), color=(0, 0, 0))
     finally:
         driver.quit()
+        if os.path.exists(tmp_path):
+            os.remove(tmp_path)
+    # 9) PNGバイナリをPIL.Imageに変換して返す
+    return Image.open(BytesIO(png))
+# Gradioインターフェース
+iface = gr.Interface(
+    fn=render_fullpage_screenshot,
+    inputs=gr.Textbox(lines=15, label="HTMLコード入力"),
+    outputs=gr.Image(type="pil", label="ページ全体のスクリーンショット"),
+    title="Full Page Screenshot App",
+    description="HTMLをヘッドレスブラウザでレンダリングし、ページ全体を1枚の画像として取得します。"
 )
 if __name__ == "__main__":
+    iface.launch()