HTMLviewer2_API / app.py
tomo2chin2's picture
Update app.py
ce62e68 verified
raw
history blame
13.6 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Fullโ€‘page HTML renderer & Gemini 2.5ย Flash textโ€‘toโ€‘infographic generator
"""
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ๆจ™ๆบ– / ๅค–้ƒจใƒฉใ‚คใƒ–ใƒฉใƒช import
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
import os, time, tempfile, logging
from io import BytesIO
from typing import List, Optional
import gradio as gr
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from huggingface_hub import hf_hub_download
# โ–ถ ๆ–ฐ Google Genย AI SDK
from google import genai
from google.genai import types
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ใƒญใ‚ฎใƒณใ‚ฐ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Pydantic ๅ…ฅๅŠ›ใƒขใƒ‡ใƒซ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class GeminiRequest(BaseModel):
text: str
extension_percentage: float = 10.0
temperature: float = 0.5
trim_whitespace: bool = True
style: str = "standard"
class ScreenshotRequest(BaseModel):
html_code: str
extension_percentage: float = 10.0
trim_whitespace: bool = True
style: str = "standard"
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Font Awesome ใƒฌใ‚คใ‚ขใ‚ฆใƒˆ่ฃœๆญฃ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def enhance_font_awesome_layout(html_code: str) -> str:
fa_css = """
<style>
[class*="fa-"]{display:inline-block!important;vertical-align:middle!important;margin-right:8px!important;}
h1 [class*="fa-"],h2 [class*="fa-"],h3 [class*="fa-"],h4 [class*="fa-"],h5 [class*="fa-"],h6 [class*="fa-"]{margin-right:10px!important;}
.fa+span,.fas+span,.far+span,.fab+span,span+.fa,span+.fas,span+.far,span+.fab{display:inline-block!important;margin-left:5px!important;}
li [class*="fa-"],p [class*="fa-"]{margin-right:10px!important;}
.inline-icon{display:inline-flex!important;align-items:center!important;}
[class*="fa-"]+span{display:inline-block!important;vertical-align:middle!important;}
</style>"""
if "<head>" in html_code:
return html_code.replace("</head>", f"{fa_css}</head>")
if "<html" in html_code:
head_end = html_code.find("</head>")
if head_end > 0:
return html_code[:head_end] + fa_css + html_code[head_end:]
body_start = html_code.find("<body")
if body_start > 0:
return html_code[:body_start] + f"<head>{fa_css}</head>" + html_code[body_start:]
return f"<html><head>{fa_css}</head>{html_code}</html>"
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# prompt.txt ใƒญใƒผใƒ‰
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def load_system_instruction(style: str = "standard") -> str:
valid = ["standard", "cute", "resort", "cool", "dental"]
if style not in valid:
logger.warning(f"ๆœช็Ÿฅใฎ style '{style}' โ†’ 'standard' ใซๅค‰ๆ›ด")
style = "standard"
local = os.path.join(os.path.dirname(__file__), style, "prompt.txt")
if os.path.exists(local):
with open(local, encoding="utf-8") as f:
return f.read()
# HF Hub fallback
file_path = hf_hub_download(
repo_id="tomo2chin2/GURAREKOstlyle",
filename=f"{style}/prompt.txt",
repo_type="dataset",
)
with open(file_path, encoding="utf-8") as f:
return f.read()
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ็™ฝไฝ™็™ฝใƒˆใƒชใƒŸใƒณใ‚ฐ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def trim_image_whitespace(img: Image.Image, threshold: int = 250, padding: int = 10) -> Image.Image:
gray = img.convert("L")
w, h = gray.size
pix = list(gray.getdata())
pix = [pix[i * w:(i + 1) * w] for i in range(h)]
min_x = min_y = w
max_x = max_y = 0
for y in range(h):
for x in range(w):
if pix[y][x] < threshold:
min_x, min_y = min(min_x, x), min(min_y, y)
max_x, max_y = max(max_x, x), max(max_y, y)
if min_x > max_x:
return img
min_x, min_y = max(0, min_x - padding), max(0, min_y - padding)
max_x, max_y = min(w - 1, max_x + padding), min(h - 1, max_y + padding)
return img.crop((min_x, min_y, max_x + 1, max_y + 1))
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Selenium ใƒ•ใƒซใƒšใƒผใ‚ธ SS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0, trim_whitespace=True) -> Image.Image:
driver: Optional[webdriver.Chrome] = None
tmp_path: Optional[str] = None
try:
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html", encoding="utf-8") as f:
tmp_path = f.name
f.write(html_code)
opts = Options()
opts.add_argument("--headless")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=opts)
driver.set_window_size(1200, 1000)
driver.get("file://" + tmp_path)
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
time.sleep(3)
total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)")
vp = driver.execute_script("return window.innerHeight")
for i in range(max(1, int(total / vp)) + 1):
driver.execute_script(f"window.scrollTo(0, {i * (vp - 200)})")
time.sleep(0.2)
driver.execute_script("window.scrollTo(0,0)")
time.sleep(0.5)
total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)")
h = int(total * (1 + extension_percentage / 100))
w = driver.execute_script("return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth)")
h, w = min(max(h, 100), 4000), min(max(w, 100), 2000)
driver.set_window_size(w, h)
time.sleep(0.5)
img = Image.open(BytesIO(driver.get_screenshot_as_png()))
return trim_image_whitespace(img, 248, 20) if trim_whitespace else img
except Exception as e:
logger.error(f"Screenshot error: {e}", exc_info=True)
return Image.new("RGB", (1, 1))
finally:
if driver:
try:
driver.quit()
except Exception:
pass
if tmp_path and os.path.exists(tmp_path):
os.remove(tmp_path)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# SafetySetting ใƒ‡ใƒ•ใ‚ฉใƒซใƒˆ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _default_safety() -> List[types.SafetySetting]:
return [
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_MEDIUM_AND_ABOVE"),
types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
]
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Gemini โ†’ HTML
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def generate_html_from_text(text: str, temperature: float = 0.3, style: str = "standard") -> str:
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY is not set")
model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-pro")
client = genai.Client(api_key=api_key)
# thinking_budget=0 ใ‚’ 2.5ย Flash ใฎใจใใ ใ‘ไป˜ไธŽ
think_cfg = types.ThinkingConfig(thinking_budget=0) if model_name == "gemini-2.5-flash-preview-04-17" else None
cfg_kwargs = dict(
system_instruction=load_system_instruction(style),
temperature=temperature,
top_p=0.7,
top_k=20,
max_output_tokens=8192,
candidate_count=1,
safety_settings=_default_safety(),
)
if think_cfg:
cfg_kwargs["thinking_config"] = think_cfg
resp = client.models.generate_content(
model=model_name,
contents=text,
config=types.GenerateContentConfig(**cfg_kwargs),
)
raw = resp.text or ""
start, end = raw.find("```html"), raw.rfind("```")
if 0 <= start < end:
html = raw[start + 7:end].strip()
return enhance_font_awesome_layout(html)
logger.warning("```html``` ใƒ–ใƒญใƒƒใ‚ฏๆœชๆคœๅ‡บ โ€” ็”Ÿใƒฌใ‚นใƒใƒณใ‚น่ฟ”ๅด")
return raw
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ใƒ†ใ‚ญใ‚นใƒˆ โ†’ SS ็ตฑๅˆ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def text_to_screenshot(text: str, extension_percentage: float, temperature=0.3, trim_whitespace=True, style="standard"):
try:
html = generate_html_from_text(text, temperature, style)
return render_fullpage_screenshot(html, extension_percentage, trim_whitespace)
except Exception as e:
logger.error(e, exc_info=True)
return Image.new("RGB", (1, 1))
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# FastAPI ใ‚ปใƒƒใƒˆใ‚ขใƒƒใƒ—
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
app = FastAPI()
app.add_middleware(
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
)
# Gradio ๅ†…่”ต้™็š„ใƒ•ใ‚กใ‚คใƒซ mount
gr_dir = os.path.dirname(gr.__file__)
for name, sub in [("static", "templates/frontend/static"), ("_app", "templates/frontend/_app"),
("assets", "templates/frontend/assets"), ("cdn", "templates/cdn")]:
p = os.path.join(gr_dir, sub)
if os.path.exists(p):
app.mount(f"/{name}", StaticFiles(directory=p), name=name)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# API ใƒซใƒผใƒˆ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@app.post("/api/screenshot", response_class=StreamingResponse, tags=["Screenshot"])
async def api_screenshot(req: ScreenshotRequest):
img = render_fullpage_screenshot(req.html_code, req.extension_percentage, req.trim_whitespace)
buf = BytesIO()
img.save(buf, format="PNG")
buf.seek(0)
return StreamingResponse(buf, media_type="image/png")
@app.post("/api/text-to-screenshot", response_class=StreamingResponse, tags=["Gemini", "Screenshot"])
async def api_text_to_ss(req: GeminiRequest):
img = text_to_screenshot(req.text, req.extension_percentage, req.temperature, req.trim_whitespace, req.style)
buf = BytesIO(); img.save(buf, "PNG"); buf.seek(0)
return StreamingResponse(buf, media_type="image/png")
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Gradio UI
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def process_input(mode, inp, ext, temp, trim, style):
return render_fullpage_screenshot(inp, ext, trim) if mode == "HTMLๅ…ฅๅŠ›" else \
text_to_screenshot(inp, ext, temp, trim, style)
with gr.Blocks(title="Full Page Screenshot & Gemini 2.5ย Flash") as iface:
gr.Markdown("## HTML ใƒ“ใƒฅใƒผใ‚ข & ใƒ†ใ‚ญใ‚นใƒˆโ†’ใ‚คใƒณใƒ•ใ‚ฉใ‚ฐใƒฉใƒ•ใ‚ฃใƒƒใ‚ฏ")
mode_r = gr.Radio(["HTMLๅ…ฅๅŠ›", "ใƒ†ใ‚ญใ‚นใƒˆๅ…ฅๅŠ›"], value="HTMLๅ…ฅๅŠ›", label="ๅ…ฅๅŠ›ใƒขใƒผใƒ‰")
inp_tb = gr.Textbox(lines=15, label="ๅ…ฅๅŠ›")
with gr.Row():
style_dd = gr.Dropdown(["standard", "cute", "resort", "cool", "dental"], value="standard",
label="ใƒ‡ใ‚ถใ‚คใƒณใ‚นใ‚ฟใ‚คใƒซ", visible=False)
ext_sl = gr.Slider(0, 30, 10, label="ไธŠไธ‹้ซ˜ใ•ๆ‹กๅผต็އ(%)")
temp_sl = gr.Slider(0.0, 1.0, 0.5, step=0.1, label="็”Ÿๆˆๆธฉๅบฆ", visible=False)
trim_cb = gr.Checkbox(True, label="ไฝ™็™ฝ่‡ชๅ‹•ใƒˆใƒชใƒŸใƒณใ‚ฐ")
btn = gr.Button("็”Ÿๆˆ")
out_img = gr.Image(type="pil", label="ใ‚นใ‚ฏใƒชใƒผใƒณใ‚ทใƒงใƒƒใƒˆ")
mode_r.change(lambda m: [{"visible": m == "ใƒ†ใ‚ญใ‚นใƒˆๅ…ฅๅŠ›", "__type__": "update"}] * 2,
mode_r, [temp_sl, style_dd])
btn.click(process_input, [mode_r, inp_tb, ext_sl, temp_sl, trim_cb, style_dd], out_img)
gr.Markdown(f"*ไฝฟ็”จใƒขใƒ‡ใƒซ*: `{os.getenv('GEMINI_MODEL', 'gemini-1.5-pro')}` "
"(geminiโ€‘2.5โ€‘flashโ€‘previewโ€‘04โ€‘17 ใงใฏ thinking_budget=0 ใ‚’่‡ชๅ‹•ไป˜ไธŽ)")
app = gr.mount_gradio_app(app, iface, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)