HTMLviewer2_API

Paused

App Files Files Community

tomo2chin2 commited on Mar 28, 2025

Commit

1281d69

verified ·

1 Parent(s): fc646af

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -38

app.py CHANGED Viewed

@@ -27,8 +27,15 @@ logger = logging.getLogger(__name__)
 class GeminiRequest(BaseModel):
     """Geminiへのリクエストデータモデル"""
     text: str
-    extension_percentage: float = 6.0  # ①デフォルト値を6%に変更
-    temperature: float = 1.0  # ④デフォルト値1.0の温度パラメータを追加
 def generate_html_from_text(text, temperature=1.0):
     """テキストからHTMLを生成する"""
@@ -158,7 +165,7 @@ def generate_html_from_text(text, temperature=1.0):
         # 生成設定
         generation_config = {
-            "temperature": temperature,  # ④パラメータとして受け取った温度を設定
             "top_p": 0.95,
             "top_k": 64,
             "max_output_tokens": 8192,
@@ -194,8 +201,63 @@ def generate_html_from_text(text, temperature=1.0):
         logger.error(f"HTML生成中にエラーが発生: {e}", exc_info=True)
         raise Exception(f"Gemini APIでのHTML生成に失敗しました: {e}")
 # --- Core Screenshot Logic ---
-def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_gemini_content: bool = False) -> Image.Image:
     """
     Renders HTML code to a full-page screenshot using Selenium.
@@ -203,6 +265,7 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
         html_code: The HTML source code string.
         extension_percentage: Percentage of extra space to add vertically (e.g., 4 means 4% total).
         is_gemini_content: True if the HTML was generated by Gemini API (requires special handling).
     Returns:
         A PIL Image object of the screenshot. Returns a 1x1 black image on error.
@@ -212,8 +275,8 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
     # Gemini生成コンテンツの場合、拡張率を調整
     if is_gemini_content:
-        # ②最低でも5%の拡張を確保（20%から5%に変更）
-        extension_percentage = max(extension_percentage, 1.0)
         logger.info(f"Gemini生成コンテンツ用に拡張率を調整: {extension_percentage}%")
     # 1) Save HTML code to a temporary file
@@ -235,18 +298,15 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
     # Font Awesomeが読み込まれない場合があるため、読み込み待機時間を長く設定
     options.add_argument("--disable-features=NetworkService")
     options.add_argument("--dns-prefetch-disable")
-    # Increase logging verbosity for debugging if needed
-    # options.add_argument("--enable-logging")
-    # options.add_argument("--v=1")
     try:
         logger.info("Initializing WebDriver...")
         driver = webdriver.Chrome(options=options)
         logger.info("WebDriver initialized.")
-        # 3) Load page with initial large window size (Geminiコンテンツ用に高さを増やす)
-        initial_width = 1800
-        initial_height = 2000 if is_gemini_content else 1200
         driver.set_window_size(initial_width, initial_height)
         file_url = "file://" + tmp_path
         logger.info(f"Navigating to {file_url}")
@@ -254,7 +314,7 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
         # 4) Wait for page load with extended timeout
         logger.info("Waiting for body element...")
-        WebDriverWait(driver, 15).until( # タイムアウトを少し延長
             EC.presence_of_element_located((By.TAG_NAME, "body"))
         )
         logger.info("Body element found. Waiting for potential resource loading...")
@@ -293,14 +353,19 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
                     document.body ? document.body.scrollHeight : 0,
                     document.body ? document.body.offsetHeight : 0,
                     document.body ? document.body.clientHeight : 0
                 )
             };
             """
             dimensions = driver.execute_script(dimensions_script)
             scroll_width = dimensions['width']
             scroll_height = dimensions['height']
-            logger.info(f"Detected dimensions: width={scroll_width}, height={scroll_height}")
             # スクロールして確認する追加の検証
             driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
@@ -314,25 +379,29 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
             logger.info(f"After scroll check, height={scroll_height}")
-            # Ensure minimum dimensions to avoid errors
-            scroll_width = max(scroll_width, 100) # 最小幅を設定
-            scroll_height = max(scroll_height, 100) # 最小高さを設定
         except Exception as e:
             logger.error(f"Error getting page dimensions: {e}")
             # フォールバックとしてデフォルト値を設定
-            scroll_width = 1800
-            scroll_height = 2000 if is_gemini_content else 1200
             logger.warning(f"Falling back to dimensions: width={scroll_width}, height={scroll_height}")
         # 7) Calculate adjusted height with user-specified margin
         adjusted_height = int(scroll_height * (1 + extension_percentage / 100.0))
         # Ensure adjusted height is not excessively large or small
-        adjusted_height = max(adjusted_height, scroll_height, 100) # 最小高さを確保
         logger.info(f"Adjusted height calculated: {adjusted_height} (extension: {extension_percentage}%)")
         # 8) Set window size to full page dimensions (width) and adjusted height
-        adjusted_width = max(scroll_width, initial_width)
         logger.info(f"Resizing window to: width={adjusted_width}, height={adjusted_height}")
         driver.set_window_size(adjusted_width, adjusted_height)
         logger.info("Waiting for layout stabilization after resize...")
@@ -378,6 +447,12 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
         # 画像サイズの確認とログ
         logger.info(f"Screenshot dimensions: {img.width}x{img.height}")
         return img
     except Exception as e:
@@ -399,14 +474,15 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
                 logger.error(f"Error removing temporary file {tmp_path}: {e}")
 # --- Geminiを使った新しい関数 ---
-def text_to_screenshot(text: str, extension_percentage: float, temperature: float = 1.0) -> Image.Image:
     """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""
     try:
         # 1. テキストからHTMLを生成（温度パラメータも渡す）
         html_code = generate_html_from_text(text, temperature)
         # 2. HTMLからスクリーンショットを生成（Gemini生成コンテンツとしてフラグをオン）
-        return render_fullpage_screenshot(html_code, extension_percentage, is_gemini_content=True)
     except Exception as e:
         logger.error(f"テキストからスクリーンショット生成中にエラーが発生: {e}", exc_info=True)
         return Image.new('RGB', (1, 1), color=(0, 0, 0))  # エラー時は黒画像
@@ -453,11 +529,6 @@ if os.path.exists(cdn_dir):
     logger.info(f"Mounting cdn directory: {cdn_dir}")
     app.mount("/cdn", StaticFiles(directory=cdn_dir), name="cdn")
-# Pydantic model for API request body validation
-class ScreenshotRequest(BaseModel):
-    html_code: str
-    extension_percentage: float = 6.0 # ①デフォルト値を6%に変更
 # API Endpoint for screenshot generation
 @app.post("/api/screenshot",
           response_class=StreamingResponse,
@@ -473,7 +544,8 @@ async def api_render_screenshot(request: ScreenshotRequest):
         # Run the blocking Selenium code in a separate thread (FastAPI handles this)
         pil_image = render_fullpage_screenshot(
             request.html_code,
-            request.extension_percentage
         )
         if pil_image.size == (1, 1):
@@ -510,7 +582,8 @@ async def api_text_to_screenshot(request: GeminiRequest):
         pil_image = text_to_screenshot(
             request.text,
             request.extension_percentage,
-            request.temperature
         )
         if pil_image.size == (1, 1):
@@ -531,14 +604,14 @@ async def api_text_to_screenshot(request: GeminiRequest):
 # --- Gradio Interface Definition ---
 # 入力モードの選択用Radioコンポーネント
-def process_input(input_mode, input_text, extension_percentage, temperature):
     """入力モードに応じて適切な処理を行う"""
     if input_mode == "HTML入力":
         # HTMLモードの場合は既存の処理
-        return render_fullpage_screenshot(input_text, extension_percentage)
     else:
         # テキスト入力モードの場合はGemini APIを使用
-        return text_to_screenshot(input_text, extension_percentage, temperature)
 # Gradio UIの定義
 with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr.themes.Base()) as iface:
@@ -552,7 +625,7 @@ with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr
             value="HTML入力"
         )
-    # ③共用のテキストボックス（タブ無し）
     input_text = gr.Textbox(
         lines=15,
         label="入力",
@@ -564,11 +637,11 @@ with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr
             minimum=0,
             maximum=30,
             step=1.0,
-            value=6,    # ①デフォルト値を6%に変更
             label="上下高さ拡張率（%）"
         )
-        # ④温度調整スライダー（テキストモード時のみ表示）
         temperature = gr.Slider(
             minimum=0.0,
             maximum=1.4,
@@ -578,12 +651,19 @@ with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr
             visible=False  # 最初は非表示
         )
     submit_btn = gr.Button("生成")
     output_image = gr.Image(type="pil", label="ページ全体のスクリーンショット")
     # 入力モード変更時のイベント処理（テキストモード時のみ温度スライダーを表示）
     def update_temperature_visibility(mode):
-        # Gradio 4.x用のアップデート方法に修正
         return {"visible": mode == "テキスト入力", "__type__": "update"}
     input_mode.change(
@@ -595,7 +675,7 @@ with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr
     # 生成ボタンクリック時のイベント処理
     submit_btn.click(
         fn=process_input,
-        inputs=[input_mode, input_text, extension_percentage, temperature],
         outputs=output_image
     )

 class GeminiRequest(BaseModel):
     """Geminiへのリクエストデータモデル"""
     text: str
+    extension_percentage: float = 6.0  # デフォルト値6%
+    temperature: float = 1.0  # デフォルト値1.0の温度パラメータ
+    trim_whitespace: bool = True  # 余白トリミングオプション（デフォルト有効）
+class ScreenshotRequest(BaseModel):
+    """スクリーンショットリクエストモデル"""
+    html_code: str
+    extension_percentage: float = 6.0  # デフォルト値6%
+    trim_whitespace: bool = True  # 余白トリミングオプション（デフォルト有効）
 def generate_html_from_text(text, temperature=1.0):
     """テキストからHTMLを生成する"""
         # 生成設定
         generation_config = {
+            "temperature": temperature,  # パラメータとして受け取った温度を設定
             "top_p": 0.95,
             "top_k": 64,
             "max_output_tokens": 8192,
         logger.error(f"HTML生成中にエラーが発生: {e}", exc_info=True)
         raise Exception(f"Gemini APIでのHTML生成に失敗しました: {e}")
+# 画像から余分な空白領域をトリミングする関数
+def trim_image_whitespace(image, threshold=250, padding=10):
+    """
+    画像から余分な白い空白をトリミングする
+    Args:
+        image: PIL.Image - 入力画像
+        threshold: int - どの明るさ以上を空白と判断するか (0-255)
+        padding: int - トリミング後に残す余白のピクセル数
+    Returns:
+        トリミングされたPIL.Image
+    """
+    # グレースケールに変換
+    gray = image.convert('L')
+    # ピクセルデータを配列として取得
+    data = gray.getdata()
+    width, height = gray.size
+    # 有効範囲を見つける
+    min_x, min_y = width, height
+    max_x = max_y = 0
+    # ピクセルデータを2次元配列に変換して処理
+    pixels = list(data)
+    pixels = [pixels[i * width:(i + 1) * width] for i in range(height)]
+    # 各行をスキャンして非空白ピクセルを見つける
+    for y in range(height):
+        for x in range(width):
+            if pixels[y][x] < threshold:  # 非空白ピクセル
+                min_x = min(min_x, x)
+                min_y = min(min_y, y)
+                max_x = max(max_x, x)
+                max_y = max(max_y, y)
+    # 境界外のトリミングの場合はエラー
+    if min_x > max_x or min_y > max_y:
+        logger.warning("トリミング領域が見つかりません。元の画像を返します。")
+        return image
+    # パディングを追加
+    min_x = max(0, min_x - padding)
+    min_y = max(0, min_y - padding)
+    max_x = min(width - 1, max_x + padding)
+    max_y = min(height - 1, max_y + padding)
+    # 画像をトリミング
+    trimmed = image.crop((min_x, min_y, max_x + 1, max_y + 1))
+    logger.info(f"画像をトリミングしました: 元サイズ {width}x{height} → トリミング後 {trimmed.width}x{trimmed.height}")
+    return trimmed
 # --- Core Screenshot Logic ---
+def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0,
+                              is_gemini_content: bool = False, trim_whitespace: bool = True) -> Image.Image:
     """
     Renders HTML code to a full-page screenshot using Selenium.
         html_code: The HTML source code string.
         extension_percentage: Percentage of extra space to add vertically (e.g., 4 means 4% total).
         is_gemini_content: True if the HTML was generated by Gemini API (requires special handling).
+        trim_whitespace: Whether to trim excess whitespace from the image.
     Returns:
         A PIL Image object of the screenshot. Returns a 1x1 black image on error.
     # Gemini生成コンテンツの場合、拡張率を調整
     if is_gemini_content:
+        # 最低でも5%の拡張を確保
+        extension_percentage = max(extension_percentage, 5.0)
         logger.info(f"Gemini生成コンテンツ用に拡張率を調整: {extension_percentage}%")
     # 1) Save HTML code to a temporary file
     # Font Awesomeが読み込まれない場合があるため、読み込み待機時間を長く設定
     options.add_argument("--disable-features=NetworkService")
     options.add_argument("--dns-prefetch-disable")
     try:
         logger.info("Initializing WebDriver...")
         driver = webdriver.Chrome(options=options)
         logger.info("WebDriver initialized.")
+        # 3) 初期ウィンドウサイズを調整（小さなコンテンツのために減らす）
+        initial_width = 1200  # 1800 -> 1200 に変更
+        initial_height = 1000 if is_gemini_content else 800  # 2000/1200 -> 1000/800 に変更
         driver.set_window_size(initial_width, initial_height)
         file_url = "file://" + tmp_path
         logger.info(f"Navigating to {file_url}")
         # 4) Wait for page load with extended timeout
         logger.info("Waiting for body element...")
+        WebDriverWait(driver, 15).until(
             EC.presence_of_element_located((By.TAG_NAME, "body"))
         )
         logger.info("Body element found. Waiting for potential resource loading...")
                     document.body ? document.body.scrollHeight : 0,
                     document.body ? document.body.offsetHeight : 0,
                     document.body ? document.body.clientHeight : 0
+                ),
+                visibleHeight: Math.max(
+                    document.documentElement.clientHeight,
+                    document.body ? document.body.clientHeight : 0
                 )
             };
             """
             dimensions = driver.execute_script(dimensions_script)
             scroll_width = dimensions['width']
             scroll_height = dimensions['height']
+            visible_height = dimensions.get('visibleHeight', 0)
+            logger.info(f"Detected dimensions: width={scroll_width}, height={scroll_height}, visibleHeight={visible_height}")
             # スクロールして確認する追加の検証
             driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
             logger.info(f"After scroll check, height={scroll_height}")
+            # 最小値の調整 - 小さなコンテンツの場合でも適切に表示できるよう調整
+            scroll_width = max(scroll_width, 100)  # 最小幅を設定
+            scroll_height = max(scroll_height, 100)  # 最小高さを設定
+            # 最大値も設定（過度に大きな画像を防ぐため）
+            scroll_width = min(scroll_width, 2000)  # 最大幅
+            scroll_height = min(scroll_height, 4000)  # 最大高さ
         except Exception as e:
             logger.error(f"Error getting page dimensions: {e}")
             # フォールバックとしてデフォルト値を設定
+            scroll_width = 1200
+            scroll_height = 800
             logger.warning(f"Falling back to dimensions: width={scroll_width}, height={scroll_height}")
         # 7) Calculate adjusted height with user-specified margin
         adjusted_height = int(scroll_height * (1 + extension_percentage / 100.0))
         # Ensure adjusted height is not excessively large or small
+        adjusted_height = max(adjusted_height, scroll_height, 100)  # 最小高さを確保
         logger.info(f"Adjusted height calculated: {adjusted_height} (extension: {extension_percentage}%)")
         # 8) Set window size to full page dimensions (width) and adjusted height
+        adjusted_width = scroll_width  # 初期幅との比較を削除
         logger.info(f"Resizing window to: width={adjusted_width}, height={adjusted_height}")
         driver.set_window_size(adjusted_width, adjusted_height)
         logger.info("Waiting for layout stabilization after resize...")
         # 画像サイズの確認とログ
         logger.info(f"Screenshot dimensions: {img.width}x{img.height}")
+        # 余白トリミングが有効な場合
+        if trim_whitespace:
+            # 余分な空白をトリミング
+            img = trim_image_whitespace(img, threshold=248, padding=20)
+            logger.info(f"Trimmed dimensions: {img.width}x{img.height}")
         return img
     except Exception as e:
                 logger.error(f"Error removing temporary file {tmp_path}: {e}")
 # --- Geminiを使った新しい関数 ---
+def text_to_screenshot(text: str, extension_percentage: float, temperature: float = 1.0, trim_whitespace: bool = True) -> Image.Image:
     """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""
     try:
         # 1. テキストからHTMLを生成（温度パラメータも渡す）
         html_code = generate_html_from_text(text, temperature)
         # 2. HTMLからスクリーンショットを生成（Gemini生成コンテンツとしてフラグをオン）
+        return render_fullpage_screenshot(html_code, extension_percentage,
+                                         is_gemini_content=True, trim_whitespace=trim_whitespace)
     except Exception as e:
         logger.error(f"テキストからスクリーンショット生成中にエラーが発生: {e}", exc_info=True)
         return Image.new('RGB', (1, 1), color=(0, 0, 0))  # エラー時は黒画像
     logger.info(f"Mounting cdn directory: {cdn_dir}")
     app.mount("/cdn", StaticFiles(directory=cdn_dir), name="cdn")
 # API Endpoint for screenshot generation
 @app.post("/api/screenshot",
           response_class=StreamingResponse,
         # Run the blocking Selenium code in a separate thread (FastAPI handles this)
         pil_image = render_fullpage_screenshot(
             request.html_code,
+            request.extension_percentage,
+            trim_whitespace=request.trim_whitespace
         )
         if pil_image.size == (1, 1):
         pil_image = text_to_screenshot(
             request.text,
             request.extension_percentage,
+            request.temperature,
+            request.trim_whitespace
         )
         if pil_image.size == (1, 1):
 # --- Gradio Interface Definition ---
 # 入力モードの選択用Radioコンポーネント
+def process_input(input_mode, input_text, extension_percentage, temperature, trim_whitespace):
     """入力モードに応じて適切な処理を行う"""
     if input_mode == "HTML入力":
         # HTMLモードの場合は既存の処理
+        return render_fullpage_screenshot(input_text, extension_percentage, trim_whitespace=trim_whitespace)
     else:
         # テキスト入力モードの場合はGemini APIを使用
+        return text_to_screenshot(input_text, extension_percentage, temperature, trim_whitespace)
 # Gradio UIの定義
 with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr.themes.Base()) as iface:
             value="HTML入力"
         )
+    # 共用のテキストボックス
     input_text = gr.Textbox(
         lines=15,
         label="入力",
             minimum=0,
             maximum=30,
             step=1.0,
+            value=6,    # デフォルト値6%
             label="上下高さ拡張率（%）"
         )
+        # 温度調整スライダー（テキストモード時のみ表示）
         temperature = gr.Slider(
             minimum=0.0,
             maximum=1.4,
             visible=False  # 最初は非表示
         )
+    # 余白トリミングオプション
+    trim_whitespace = gr.Checkbox(
+        label="余白を自動トリミング",
+        value=True,
+        info="生成される画像から余分な空白領域を自動的に削除します"
+    )
     submit_btn = gr.Button("生成")
     output_image = gr.Image(type="pil", label="ページ全体のスクリーンショット")
     # 入力モード変更時のイベント処理（テキストモード時のみ温度スライダーを表示）
     def update_temperature_visibility(mode):
+        # Gradio 4.x用のアップデート方法
         return {"visible": mode == "テキスト入力", "__type__": "update"}
     input_mode.change(
     # 生成ボタンクリック時のイベント処理
     submit_btn.click(
         fn=process_input,
+        inputs=[input_mode, input_text, extension_percentage, temperature, trim_whitespace],
         outputs=output_image
     )