#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Full‑page HTML renderer & Gemini 2.5 Flash text‑to‑infographic generator """ # ─────────────────────────── # 標準 / 外部ライブラリ import # ─────────────────────────── import os, time, tempfile, logging from io import BytesIO from typing import List, Optional import gradio as gr from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from PIL import Image from huggingface_hub import hf_hub_download # ▶ 新 Google Gen AI SDK from google import genai from google.genai import types # ─────────────────────────── # ロギング # ─────────────────────────── logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ─────────────────────────── # Pydantic 入力モデル # ─────────────────────────── class GeminiRequest(BaseModel): text: str extension_percentage: float = 10.0 temperature: float = 0.5 trim_whitespace: bool = True style: str = "standard" class ScreenshotRequest(BaseModel): html_code: str extension_percentage: float = 10.0 trim_whitespace: bool = True style: str = "standard" # ─────────────────────────── # Font Awesome レイアウト補正 # ─────────────────────────── def enhance_font_awesome_layout(html_code: str) -> str: fa_css = """ """ if "" in html_code: return html_code.replace("", f"{fa_css}") if "") if head_end > 0: return html_code[:head_end] + fa_css + html_code[head_end:] body_start = html_code.find(" 0: return html_code[:body_start] + f"{fa_css}" + html_code[body_start:] return f"{fa_css}{html_code}" # ─────────────────────────── # prompt.txt ロード # ─────────────────────────── def load_system_instruction(style: str = "standard") -> str: valid = ["standard", "cute", "resort", "cool", "dental"] if style not in valid: logger.warning(f"未知の style '{style}' → 'standard' に変更") style = "standard" local = os.path.join(os.path.dirname(__file__), style, "prompt.txt") if os.path.exists(local): with open(local, encoding="utf-8") as f: return f.read() # HF Hub fallback file_path = hf_hub_download( repo_id="tomo2chin2/GURAREKOstlyle", filename=f"{style}/prompt.txt", repo_type="dataset", ) with open(file_path, encoding="utf-8") as f: return f.read() # ─────────────────────────── # 白余白トリミング # ─────────────────────────── def trim_image_whitespace(img: Image.Image, threshold: int = 250, padding: int = 10) -> Image.Image: gray = img.convert("L") w, h = gray.size pix = list(gray.getdata()) pix = [pix[i * w:(i + 1) * w] for i in range(h)] min_x = min_y = w max_x = max_y = 0 for y in range(h): for x in range(w): if pix[y][x] < threshold: min_x, min_y = min(min_x, x), min(min_y, y) max_x, max_y = max(max_x, x), max(max_y, y) if min_x > max_x: return img min_x, min_y = max(0, min_x - padding), max(0, min_y - padding) max_x, max_y = min(w - 1, max_x + padding), min(h - 1, max_y + padding) return img.crop((min_x, min_y, max_x + 1, max_y + 1)) # ─────────────────────────── # Selenium フルページ SS # ─────────────────────────── def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0, trim_whitespace=True) -> Image.Image: driver: Optional[webdriver.Chrome] = None tmp_path: Optional[str] = None try: with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html", encoding="utf-8") as f: tmp_path = f.name f.write(html_code) opts = Options() opts.add_argument("--headless") opts.add_argument("--no-sandbox") opts.add_argument("--disable-dev-shm-usage") driver = webdriver.Chrome(options=opts) driver.set_window_size(1200, 1000) driver.get("file://" + tmp_path) WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "body"))) time.sleep(3) total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)") vp = driver.execute_script("return window.innerHeight") for i in range(max(1, int(total / vp)) + 1): driver.execute_script(f"window.scrollTo(0, {i * (vp - 200)})") time.sleep(0.2) driver.execute_script("window.scrollTo(0,0)") time.sleep(0.5) total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)") h = int(total * (1 + extension_percentage / 100)) w = driver.execute_script("return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth)") h, w = min(max(h, 100), 4000), min(max(w, 100), 2000) driver.set_window_size(w, h) time.sleep(0.5) img = Image.open(BytesIO(driver.get_screenshot_as_png())) return trim_image_whitespace(img, 248, 20) if trim_whitespace else img except Exception as e: logger.error(f"Screenshot error: {e}", exc_info=True) return Image.new("RGB", (1, 1)) finally: if driver: try: driver.quit() except Exception: pass if tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) # ─────────────────────────── # SafetySetting デフォルト # ─────────────────────────── def _default_safety() -> List[types.SafetySetting]: return [ types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_MEDIUM_AND_ABOVE"), types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_MEDIUM_AND_ABOVE"), types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_MEDIUM_AND_ABOVE"), types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_MEDIUM_AND_ABOVE"), ] # ─────────────────────────── # Gemini → HTML # ─────────────────────────── def generate_html_from_text(text: str, temperature: float = 0.3, style: str = "standard") -> str: api_key = os.getenv("GEMINI_API_KEY") if not api_key: raise ValueError("GEMINI_API_KEY is not set") model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-pro") client = genai.Client(api_key=api_key) # thinking_budget=0 を 2.5 Flash のときだけ付与 think_cfg = types.ThinkingConfig(thinking_budget=0) if model_name == "gemini-2.5-flash-preview-04-17" else None cfg_kwargs = dict( system_instruction=load_system_instruction(style), temperature=temperature, top_p=0.7, top_k=20, max_output_tokens=8192, candidate_count=1, safety_settings=_default_safety(), ) if think_cfg: cfg_kwargs["thinking_config"] = think_cfg resp = client.models.generate_content( model=model_name, contents=text, config=types.GenerateContentConfig(**cfg_kwargs), ) raw = resp.text or "" start, end = raw.find("```html"), raw.rfind("```") if 0 <= start < end: html = raw[start + 7:end].strip() return enhance_font_awesome_layout(html) logger.warning("```html``` ブロック未検出 — 生レスポンス返却") return raw # ─────────────────────────── # テキスト → SS 統合 # ─────────────────────────── def text_to_screenshot(text: str, extension_percentage: float, temperature=0.3, trim_whitespace=True, style="standard"): try: html = generate_html_from_text(text, temperature, style) return render_fullpage_screenshot(html, extension_percentage, trim_whitespace) except Exception as e: logger.error(e, exc_info=True) return Image.new("RGB", (1, 1)) # ─────────────────────────── # FastAPI セットアップ # ─────────────────────────── app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"] ) # Gradio 内蔵静的ファイル mount gr_dir = os.path.dirname(gr.__file__) for name, sub in [("static", "templates/frontend/static"), ("_app", "templates/frontend/_app"), ("assets", "templates/frontend/assets"), ("cdn", "templates/cdn")]: p = os.path.join(gr_dir, sub) if os.path.exists(p): app.mount(f"/{name}", StaticFiles(directory=p), name=name) # ─────────────────────────── # API ルート # ─────────────────────────── @app.post("/api/screenshot", response_class=StreamingResponse, tags=["Screenshot"]) async def api_screenshot(req: ScreenshotRequest): img = render_fullpage_screenshot(req.html_code, req.extension_percentage, req.trim_whitespace) buf = BytesIO() img.save(buf, format="PNG") buf.seek(0) return StreamingResponse(buf, media_type="image/png") @app.post("/api/text-to-screenshot", response_class=StreamingResponse, tags=["Gemini", "Screenshot"]) async def api_text_to_ss(req: GeminiRequest): img = text_to_screenshot(req.text, req.extension_percentage, req.temperature, req.trim_whitespace, req.style) buf = BytesIO(); img.save(buf, "PNG"); buf.seek(0) return StreamingResponse(buf, media_type="image/png") # ─────────────────────────── # Gradio UI # ─────────────────────────── def process_input(mode, inp, ext, temp, trim, style): return render_fullpage_screenshot(inp, ext, trim) if mode == "HTML入力" else \ text_to_screenshot(inp, ext, temp, trim, style) with gr.Blocks(title="Full Page Screenshot & Gemini 2.5 Flash") as iface: gr.Markdown("## HTML ビューア & テキスト→インフォグラフィック") mode_r = gr.Radio(["HTML入力", "テキスト入力"], value="HTML入力", label="入力モード") inp_tb = gr.Textbox(lines=15, label="入力") with gr.Row(): style_dd = gr.Dropdown(["standard", "cute", "resort", "cool", "dental"], value="standard", label="デザインスタイル", visible=False) ext_sl = gr.Slider(0, 30, 10, label="上下高さ拡張率(%)") temp_sl = gr.Slider(0.0, 1.0, 0.5, step=0.1, label="生成温度", visible=False) trim_cb = gr.Checkbox(True, label="余白自動トリミング") btn = gr.Button("生成") out_img = gr.Image(type="pil", label="スクリーンショット") mode_r.change(lambda m: [{"visible": m == "テキスト入力", "__type__": "update"}] * 2, mode_r, [temp_sl, style_dd]) btn.click(process_input, [mode_r, inp_tb, ext_sl, temp_sl, trim_cb, style_dd], out_img) gr.Markdown(f"*使用モデル*: `{os.getenv('GEMINI_MODEL', 'gemini-1.5-pro')}` " "(gemini‑2.5‑flash‑preview‑04‑17 では thinking_budget=0 を自動付与)") app = gr.mount_gradio_app(app, iface, path="/") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)