HTMLviewer3_API_DATASET_URL

Paused

File size: 4,367 Bytes

b26e489
 
e0538f9
 
 
 
b26e489
 
e0538f9
 
 
b26e489
e170610
600cc03
e170610
 
600cc03
 
e170610
b26e489
 
 
 
e0538f9
e170610
e0538f9
e170610
 
 
 
e0538f9
b26e489
 
e170610
fbe2dc1
b26e489
e0538f9
e170610
e0538f9
 
 
e170610
 
 
 
 
 
 
 
 
600cc03
 
e170610
 
 
ee6f0ce
e170610
 
 
ee6f0ce
e170610
 
 
 
 
600cc03
e170610
 
 
fbe2dc1
e170610
 
 
 
 
e0538f9
e170610
 
 
 
ee6f0ce
e170610
 
 
 
e0538f9
 
e170610
e0538f9
 
b26e489
 
 
 
e0538f9
e170610
b26e489
fbe2dc1
b26e489
fbe2dc1
b26e489
e170610
 
 
 
 
 
b26e489

import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from io import BytesIO
import tempfile
import time
import os

def render_fullpage_screenshot(html_code):
    """
    ページを少しずつスクロールしながら複数回のスクリーンショットを撮影し、
    それらを縦方向に結合して1枚の長い画像にする。
    """

    # 1) HTMLコードを一時ファイルに保存
    tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
    tmp_path = tmp_file.name
    tmp_file.write(html_code.encode('utf-8'))
    tmp_file.close()

    # 2) ヘッドレスChrome(Chromium)起動オプション設定
    options = Options()
    options.add_argument("--headless")               # ヘッドレスモード
    options.add_argument("--no-sandbox")             # コンテナ環境でのサンドボックス無効化
    options.add_argument("--disable-dev-shm-usage")  # /dev/shmを使わない
    options.add_argument("--force-device-scale-factor=1")  # DPI/スケール固定

    try:
        driver = webdriver.Chrome(options=options)
        # 適当な初期ウィンドウサイズに設定してページを読み込む
        driver.set_window_size(1200, 800)
        driver.get("file://" + tmp_path)

        # 3) ページロード完了を待機
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        # さらにフォントや外部リソースを読み込み終わるまで少し待つ
        time.sleep(2)

        # ---- スクロールバーを非表示にしたい場合 ----
        # （ただし一部ページでレイアウトが変化する可能性があるので要注意）
        driver.execute_script(
            "document.documentElement.style.overflow = 'hidden';"
            "document.body.style.overflow = 'hidden';"
        )
        time.sleep(1)

        # 4) 画面のビューポート高・ページ全体の高さを取得
        viewport_height = driver.execute_script("return window.innerHeight")
        scroll_height = driver.execute_script("return document.body.scrollHeight")

        # スクロール＋キャプチャを繰り返す
        images = []
        current_position = 0

        while True:
            # 現在の画面をスクリーンショット
            png = driver.get_screenshot_as_png()
            img = Image.open(BytesIO(png))
            images.append(img)

            # もし次のスクロールでページ末尾を超えるなら、ループを抜ける
            if current_position + viewport_height >= scroll_height:
                break

            # 次のスクロール位置へ移動
            current_position += viewport_height
            driver.execute_script(f"window.scrollTo(0, {current_position})")
            # スクロール後の描画待ち
            time.sleep(1)

        # 5) 取得した複数画像を縦方向に結合
        total_width = max(img.width for img in images)
        total_height = sum(img.height for img in images)
        full_screenshot = Image.new('RGB', (total_width, total_height))

        current_y = 0
        for img in images:
            full_screenshot.paste(img, (0, current_y))
            current_y += img.height

    except Exception as e:
        # 何らかのエラーが発生した場合、1x1の黒画像を返す
        return Image.new('RGB', (1, 1), color=(0, 0, 0))

    finally:
        driver.quit()
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

    return full_screenshot

# Gradioインターフェース
iface = gr.Interface(
    fn=render_fullpage_screenshot,
    inputs=gr.Textbox(lines=15, label="HTMLコード入力"),
    outputs=gr.Image(type="pil", label="フルページスクリーンショット"),
    title="Full Page Screenshot with Scrolling",
    description=(
        "ページを少しずつスクロールしながら複数回キャプチャを撮影し、"
        "最終的に縦に結合して1枚の長い画像を生成します。"
    )
)

if __name__ == "__main__":
    iface.launch()