tomo2chin2 commited on
Commit
ce62e68
·
verified ·
1 Parent(s): c6f4adf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -340
app.py CHANGED
@@ -1,26 +1,16 @@
1
-
 
2
  """
3
- Full‑page HTML renderer & Gemini 2.5 Flash text‑to‑infographic generator
4
- ────────────────────────────────────────────────────────────────────────
5
- * Gradio 5.x UI + FastAPI backend
6
- * Selenium‑Chrome headless full‑page screenshot
7
- * Gemini 1.5‑pro 〜 2.5‑flash‑preview‑04‑17 対応
8
- ‑ 2.5‑flash のときだけ thinking_budget=0 を自動付与
9
  """
10
 
11
- # ────────────────────────────────────
12
- # 基本ライブラリ
13
- # ────────────────────────────────────
14
- import os
15
- import time
16
- import tempfile
17
- import logging
18
  from io import BytesIO
19
  from typing import List, Optional
20
 
21
- # ────────────────────────────────────
22
- # サードパーティ
23
- # ────────────────────────────────────
24
  import gradio as gr
25
  from fastapi import FastAPI, HTTPException
26
  from fastapi.responses import StreamingResponse
@@ -35,19 +25,19 @@ from selenium.webdriver.support import expected_conditions as EC
35
  from PIL import Image
36
  from huggingface_hub import hf_hub_download
37
 
38
- # ▶ 新しい Gemini SDK
39
- from google import genai # google‑genai ≥1.11.0
40
- from google.genai import types # 型オブジェクト
41
 
42
- # ────────────────────────────────────
43
- # ロギング設定
44
- # ────────────────────────────────────
45
  logging.basicConfig(level=logging.INFO)
46
  logger = logging.getLogger(__name__)
47
 
48
- # ────────────────────────────────────
49
- # Pydantic モデル
50
- # ────────────────────────────────────
51
  class GeminiRequest(BaseModel):
52
  text: str
53
  extension_percentage: float = 10.0
@@ -63,160 +53,112 @@ class ScreenshotRequest(BaseModel):
63
  style: str = "standard"
64
 
65
 
66
- # ────────────────────────────────────
67
- # Font Awesome レイアウト調整
68
- # ────────────────────────────────────
69
  def enhance_font_awesome_layout(html_code: str) -> str:
70
- """Font Awesome アイコンの表示ズレを修正する追加 CSS を挿入"""
71
- fa_fix_css = """
72
  <style>
73
- /* Font Awesome icon tweaks */
74
- [class*="fa-"]{
75
- display:inline-block!important;
76
- vertical-align:middle!important;
77
- margin-right:8px!important;
78
- }
79
- h1 [class*="fa-"],h2 [class*="fa-"],h3 [class*="fa-"],
80
- h4 [class*="fa-"],h5 [class*="fa-"],h6 [class*="fa-"]{
81
- margin-right:10px!important;
82
- }
83
- .fa+span,.fas+span,.far+span,.fab+span,
84
- span+.fa,span+.fas,span+.far,span+.fab{
85
- display:inline-block!important;margin-left:5px!important;
86
- }
87
  li [class*="fa-"],p [class*="fa-"]{margin-right:10px!important;}
88
  .inline-icon{display:inline-flex!important;align-items:center!important;}
89
  [class*="fa-"]+span{display:inline-block!important;vertical-align:middle!important;}
90
- </style>
91
- """
92
  if "<head>" in html_code:
93
- return html_code.replace("</head>", f"{fa_fix_css}</head>")
94
- elif "<html" in html_code:
95
  head_end = html_code.find("</head>")
96
  if head_end > 0:
97
- return html_code[:head_end] + fa_fix_css + html_code[head_end:]
98
  body_start = html_code.find("<body")
99
  if body_start > 0:
100
- return html_code[:body_start] + f"<head>{fa_fix_css}</head>" + html_code[body_start:]
101
- return f"<html><head>{fa_fix_css}</head>{html_code}</html>"
102
 
103
 
104
- # ────────────────────────────────────
105
- # システムプンプト読み込み
106
- # ────────────────────────────────────
107
  def load_system_instruction(style: str = "standard") -> str:
108
- """style ごとの prompt.txt をローカル or HF Hub から取得"""
109
- styles = ["standard", "cute", "resort", "cool", "dental"]
110
- if style not in styles:
111
- logger.warning(f"無効なスタイル '{style}' → 'standard' を使用")
112
  style = "standard"
113
-
114
- # ローカル first
115
- local_path = os.path.join(os.path.dirname(__file__), style, "prompt.txt")
116
- if os.path.exists(local_path):
117
- with open(local_path, encoding="utf-8") as f:
118
  return f.read()
119
-
120
  # HF Hub fallback
121
- try:
122
- file_path = hf_hub_download(
123
- repo_id="tomo2chin2/GURAREKOstlyle",
124
- filename=f"{style}/prompt.txt",
125
- repo_type="dataset",
126
- )
127
- with open(file_path, encoding="utf-8") as f:
128
- return f.read()
129
- except Exception as e:
130
- logger.error(f"prompt.txt 取得失敗: {e}")
131
- raise
132
-
133
-
134
- # ────────────────────────────────────
135
- # ③ 画像の空白トリミング
136
- # ────────────────────────────────────
137
- def trim_image_whitespace(
138
- image: Image.Image, threshold: int = 250, padding: int = 10
139
- ) -> Image.Image:
140
- """白余白を検出しパディングを残して切り詰める"""
141
- gray = image.convert("L")
142
- data = gray.getdata()
143
  w, h = gray.size
144
- min_x, min_y, max_x, max_y = w, h, 0, 0
145
- pixels = list(data)
146
- pixels = [pixels[i * w : (i + 1) * w] for i in range(h)]
 
147
  for y in range(h):
148
  for x in range(w):
149
- if pixels[y][x] < threshold:
150
  min_x, min_y = min(min_x, x), min(min_y, y)
151
  max_x, max_y = max(max_x, x), max(max_y, y)
152
  if min_x > max_x:
153
- return image
154
  min_x, min_y = max(0, min_x - padding), max(0, min_y - padding)
155
  max_x, max_y = min(w - 1, max_x + padding), min(h - 1, max_y + padding)
156
- return image.crop((min_x, min_y, max_x + 1, max_y + 1))
157
 
158
 
159
- # ────────────────────────────────────
160
- # Selenium フルページ SS 生成
161
- # ────────────────────────────────────
162
- def render_fullpage_screenshot(
163
- html_code: str, extension_percentage: float = 6.0, trim_whitespace: bool = True
164
- ) -> Image.Image:
165
- """HTML 文字列 → full‑page PNG → PIL.Image"""
166
- tmp_path: Optional[str] = None
167
  driver: Optional[webdriver.Chrome] = None
 
168
  try:
169
- with tempfile.NamedTemporaryFile(
170
- suffix=".html", delete=False, mode="w", encoding="utf-8"
171
- ) as tmp:
172
- tmp.write(html_code)
173
- tmp_path = tmp.name
174
- options = Options()
175
- options.add_argument("--headless")
176
- options.add_argument("--no-sandbox")
177
- options.add_argument("--disable-dev-shm-usage")
178
- options.add_argument("--force-device-scale-factor=1")
179
- driver = webdriver.Chrome(options=options)
180
  driver.set_window_size(1200, 1000)
181
- driver.get(f"file://{tmp_path}")
182
-
183
- WebDriverWait(driver, 15).until(
184
- EC.presence_of_element_located((By.TAG_NAME, "body"))
185
- )
186
- time.sleep(3) # 初期ロード待ち
187
-
188
- # 縦スクロールしてレンダリング確定
189
- total = driver.execute_script(
190
- "return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);"
191
- )
192
- vp = driver.execute_script("return window.innerHeight;")
193
- for i in range(max(1, total // vp) + 1):
194
- driver.execute_script(f"window.scrollTo(0, {i*(vp-200)});")
195
  time.sleep(0.2)
196
- driver.execute_script("window.scrollTo(0,0);")
197
- time.sleep(1)
198
-
199
- # 全体高さに余白を追加
200
- total = driver.execute_script(
201
- "return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);"
202
- )
203
- height = int(total * (1 + extension_percentage / 100))
204
- width = driver.execute_script(
205
- "return Math.max(document.documentElement.scrollWidth, document.body.scrollWidth);"
206
- )
207
- height = min(max(height, 100), 4000)
208
- width = min(max(width, 100), 2000)
209
- driver.set_window_size(width, height)
210
  time.sleep(0.5)
211
-
212
- png = driver.get_screenshot_as_png()
213
- img = Image.open(BytesIO(png))
214
- if trim_whitespace:
215
- img = trim_image_whitespace(img, threshold=248, padding=20)
216
- return img
 
 
217
  except Exception as e:
218
- logger.error(f"Screenshot Error: {e}", exc_info=True)
219
- return Image.new("RGB", (1, 1), (0, 0, 0))
220
  finally:
221
  if driver:
222
  try:
@@ -227,247 +169,131 @@ def render_fullpage_screenshot(
227
  os.remove(tmp_path)
228
 
229
 
230
- # ────────────────────────────────────
231
- # Gemini → HTML 生成
232
- # ────────────────────────────────────
233
- def _genai_client(api_key: str) -> genai.Client:
234
- return genai.Client(api_key=api_key)
235
-
236
-
237
  def _default_safety() -> List[types.SafetySetting]:
238
  return [
239
- types.SafetySetting(
240
- category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_MEDIUM_AND_ABOVE"
241
- ),
242
- types.SafetySetting(
243
- category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_MEDIUM_AND_ABOVE"
244
- ),
245
- types.SafetySetting(
246
- category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_MEDIUM_AND_ABOVE"
247
- ),
248
- types.SafetySetting(
249
- category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_MEDIUM_AND_ABOVE"
250
- ),
251
  ]
252
 
253
 
254
- def generate_html_from_text(
255
- text: str, temperature: float = 0.3, style: str = "standard"
256
- ) -> str:
257
- """
258
- Gemini モデルから HTML コードを返す。
259
- * 環境変数 GEMINI_MODEL が gemini‑2.5‑flash-preview‑04-17 の場合
260
- -> thinking_budget=0 を付けて呼び出し
261
- """
262
  api_key = os.getenv("GEMINI_API_KEY")
263
  if not api_key:
264
- raise ValueError("GEMINI_API_KEY が設定されていません")
265
  model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-pro")
266
- client = _genai_client(api_key)
 
 
 
267
 
268
- gen_cfg = types.GenerationConfig(
 
269
  temperature=temperature,
270
  top_p=0.7,
271
  top_k=20,
272
  max_output_tokens=8192,
273
  candidate_count=1,
274
- )
275
- safety_cfg = _default_safety()
276
- think_cfg = (
277
- types.ThinkingConfig(thinking_budget=0)
278
- if model_name == "gemini-2.5-flash-preview-04-17"
279
- else None
280
- )
281
-
282
- req_cfg_kwargs = dict(
283
- generation_config=gen_cfg,
284
- safety_settings=safety_cfg,
285
  )
286
  if think_cfg:
287
- req_cfg_kwargs["thinking_config"] = think_cfg
288
-
289
- req_cfg = types.GenerateContentConfig(**req_cfg_kwargs)
290
 
291
- prompt = f"{load_system_instruction(style)}\n\n{text}"
292
- logger.info(
293
- f"Gemini request → model={model_name}, temp={temperature}, thinking_budget={0 if think_cfg else None}"
 
294
  )
295
-
296
- rsp = client.models.generate_content(
297
- model=model_name, contents=prompt, config=req_cfg
298
- )
299
- raw = rsp.text or ""
300
-
301
- start = raw.find("```html")
302
- end = raw.rfind("```")
303
  if 0 <= start < end:
304
- html_code = raw[start + 7 : end].strip()
305
- return enhance_font_awesome_layout(html_code)
306
-
307
- logger.warning("```html``` ブロックなし。生レスポンスを返します")
308
  return raw
309
 
310
 
311
- # ────────────────────────────────────
312
- # テキスト → スクリーンショット統合
313
- # ────────────────────────────────────
314
- def text_to_screenshot(
315
- text: str,
316
- extension_percentage: float,
317
- temperature: float = 0.3,
318
- trim_whitespace: bool = True,
319
- style: str = "standard",
320
- ) -> Image.Image:
321
  try:
322
  html = generate_html_from_text(text, temperature, style)
323
  return render_fullpage_screenshot(html, extension_percentage, trim_whitespace)
324
  except Exception as e:
325
  logger.error(e, exc_info=True)
326
- return Image.new("RGB", (1, 1), (0, 0, 0))
327
 
328
 
329
- # ────────────────────────────────────
330
- # FastAPI セットアップ
331
- # ────────────────────────────────────
332
  app = FastAPI()
333
  app.add_middleware(
334
- CORSMiddleware,
335
- allow_origins=["*"],
336
- allow_credentials=True,
337
- allow_methods=["*"],
338
- allow_headers=["*"],
339
  )
340
 
341
- # Gradio 静的ファイルをマウント
342
- gradio_dir = os.path.dirname(gr.__file__)
343
- for sub in [
344
- ("static", "templates/frontend/static"),
345
- ("_app", "templates/frontend/_app"),
346
- ("assets", "templates/frontend/assets"),
347
- ("cdn", "templates/cdn"),
348
- ]:
349
- target = os.path.join(gradio_dir, sub[1])
350
- if os.path.exists(target):
351
- app.mount(f"/{sub[0]}", StaticFiles(directory=target), name=sub[0])
352
- logger.info(f"Mounted /{sub[0]} {target}")
353
-
354
- # ────────────────────────────────────
355
- # ⑧ API エンドポイント
356
- # ────────────────────────────────────
357
- @app.post(
358
- "/api/screenshot",
359
- response_class=StreamingResponse,
360
- tags=["Screenshot"],
361
- summary="HTML → Full‑page Screenshot",
362
- )
363
- async def api_render_screenshot(req: ScreenshotRequest):
364
- img = render_fullpage_screenshot(
365
- req.html_code, req.extension_percentage, req.trim_whitespace
366
- )
367
  buf = BytesIO()
368
  img.save(buf, format="PNG")
369
  buf.seek(0)
370
  return StreamingResponse(buf, media_type="image/png")
371
 
372
 
373
- @app.post(
374
- "/api/text-to-screenshot",
375
- response_class=StreamingResponse,
376
- tags=["Screenshot", "Gemini"],
377
- summary="Text → Gemini → Infographic Screenshot",
378
- )
379
- async def api_text_to_screenshot(req: GeminiRequest):
380
- img = text_to_screenshot(
381
- req.text,
382
- req.extension_percentage,
383
- req.temperature,
384
- req.trim_whitespace,
385
- req.style,
386
- )
387
- buf = BytesIO()
388
- img.save(buf, format="PNG")
389
- buf.seek(0)
390
  return StreamingResponse(buf, media_type="image/png")
391
 
392
 
393
- # ────────────────────────────────────
394
- # Gradio UI
395
- # ────────────────────────────────────
396
- def process_input(
397
- input_mode, input_text, extension_percentage, temperature, trim_whitespace, style
398
- ):
399
- if input_mode == "HTML入力":
400
- return render_fullpage_screenshot(
401
- input_text, extension_percentage, trim_whitespace
402
- )
403
- return text_to_screenshot(
404
- input_text, extension_percentage, temperature, trim_whitespace, style
405
- )
406
-
407
 
408
- with gr.Blocks(title="Full Page Screenshot + Gemini 2.5 Flash") as iface:
409
- gr.Markdown("## HTML ビューア & テキスト インフォグラフィック")
410
- input_mode = gr.Radio(["HTML入力", "テキスト入力"], value="HTML入力", label="入力モード")
411
- input_text = gr.Textbox(lines=15, label="入力")
412
  with gr.Row():
413
- style_dd = gr.Dropdown(
414
- ["standard", "cute", "resort", "cool", "dental"],
415
- value="standard",
416
- label="デザ��ンスタイル",
417
- visible=False,
418
- )
419
- extension_slider = gr.Slider(0, 30, 10, label="上下高さ拡張率(%)")
420
- temperature_slider = gr.Slider(
421
- 0.0,
422
- 1.0,
423
- 0.5,
424
- step=0.1,
425
- label="生成温度",
426
- visible=False,
427
- )
428
- trim_cb = gr.Checkbox(value=True, label="余白自動トリミング")
429
  btn = gr.Button("生成")
430
  out_img = gr.Image(type="pil", label="スクリーンショット")
 
 
 
 
 
431
 
432
- def _vis(mode):
433
- is_text = mode == "テキスト入力"
434
- return [
435
- {"visible": is_text, "__type__": "update"},
436
- {"visible": is_text, "__type__": "update"},
437
- ]
438
-
439
- input_mode.change(_vis, input_mode, [temperature_slider, style_dd])
440
- btn.click(
441
- process_input,
442
- [
443
- input_mode,
444
- input_text,
445
- extension_slider,
446
- temperature_slider,
447
- trim_cb,
448
- style_dd,
449
- ],
450
- out_img,
451
- )
452
-
453
- gr.Markdown(
454
- f"""
455
- ### 環境
456
- * 使用モデル: `{os.getenv('GEMINI_MODEL', 'gemini-1.5-pro')}`
457
- * thinking_budget=0 は `gemini-2.5-flash-preview-04-17` 使用時のみ自動付与
458
- """
459
- )
460
-
461
- # ────────────────────────────────────
462
- # ⑩ FastAPI に Gradio をマウント
463
- # ────────────────────────────────────
464
  app = gr.mount_gradio_app(app, iface, path="/")
465
 
466
- # ────────────────────────────────────
467
- # ⑪ 直接実行時
468
- # ────────────────────────────────────
469
  if __name__ == "__main__":
470
  import uvicorn
471
-
472
- logger.info("Starting dev server at http://localhost:7860")
473
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
  """
4
+ Full‑page HTML renderer & Gemini 2.5 Flash text‑to‑infographic generator
 
 
 
 
 
5
  """
6
 
7
+ # ───────────────────────────
8
+ # 標準 / 外部ライブラリ import
9
+ # ───────────────────────────
10
+ import os, time, tempfile, logging
 
 
 
11
  from io import BytesIO
12
  from typing import List, Optional
13
 
 
 
 
14
  import gradio as gr
15
  from fastapi import FastAPI, HTTPException
16
  from fastapi.responses import StreamingResponse
 
25
  from PIL import Image
26
  from huggingface_hub import hf_hub_download
27
 
28
+ # ▶ 新 Google Gen AI SDK
29
+ from google import genai
30
+ from google.genai import types
31
 
32
+ # ───────────────────────────
33
+ # ロギング
34
+ # ───────────────────────────
35
  logging.basicConfig(level=logging.INFO)
36
  logger = logging.getLogger(__name__)
37
 
38
+ # ───────────────────────────
39
+ # Pydantic 入力モデル
40
+ # ───────────────────────────
41
  class GeminiRequest(BaseModel):
42
  text: str
43
  extension_percentage: float = 10.0
 
53
  style: str = "standard"
54
 
55
 
56
+ # ───────────────────────────
57
+ # Font Awesome レイアウト補正
58
+ # ───────────────────────────
59
  def enhance_font_awesome_layout(html_code: str) -> str:
60
+ fa_css = """
 
61
  <style>
62
+ [class*="fa-"]{display:inline-block!important;vertical-align:middle!important;margin-right:8px!important;}
63
+ h1 [class*="fa-"],h2 [class*="fa-"],h3 [class*="fa-"],h4 [class*="fa-"],h5 [class*="fa-"],h6 [class*="fa-"]{margin-right:10px!important;}
64
+ .fa+span,.fas+span,.far+span,.fab+span,span+.fa,span+.fas,span+.far,span+.fab{display:inline-block!important;margin-left:5px!important;}
 
 
 
 
 
 
 
 
 
 
 
65
  li [class*="fa-"],p [class*="fa-"]{margin-right:10px!important;}
66
  .inline-icon{display:inline-flex!important;align-items:center!important;}
67
  [class*="fa-"]+span{display:inline-block!important;vertical-align:middle!important;}
68
+ </style>"""
 
69
  if "<head>" in html_code:
70
+ return html_code.replace("</head>", f"{fa_css}</head>")
71
+ if "<html" in html_code:
72
  head_end = html_code.find("</head>")
73
  if head_end > 0:
74
+ return html_code[:head_end] + fa_css + html_code[head_end:]
75
  body_start = html_code.find("<body")
76
  if body_start > 0:
77
+ return html_code[:body_start] + f"<head>{fa_css}</head>" + html_code[body_start:]
78
+ return f"<html><head>{fa_css}</head>{html_code}</html>"
79
 
80
 
81
+ # ───────────────────────────
82
+ # prompt.txtード
83
+ # ───────────────────────────
84
  def load_system_instruction(style: str = "standard") -> str:
85
+ valid = ["standard", "cute", "resort", "cool", "dental"]
86
+ if style not in valid:
87
+ logger.warning(f"未知の style '{style}' 'standard' に変更")
 
88
  style = "standard"
89
+ local = os.path.join(os.path.dirname(__file__), style, "prompt.txt")
90
+ if os.path.exists(local):
91
+ with open(local, encoding="utf-8") as f:
 
 
92
  return f.read()
 
93
  # HF Hub fallback
94
+ file_path = hf_hub_download(
95
+ repo_id="tomo2chin2/GURAREKOstlyle",
96
+ filename=f"{style}/prompt.txt",
97
+ repo_type="dataset",
98
+ )
99
+ with open(file_path, encoding="utf-8") as f:
100
+ return f.read()
101
+
102
+
103
+ # ───────────────────────────
104
+ # 白余白トリミング
105
+ # ───────────────────────────
106
+ def trim_image_whitespace(img: Image.Image, threshold: int = 250, padding: int = 10) -> Image.Image:
107
+ gray = img.convert("L")
 
 
 
 
 
 
 
 
108
  w, h = gray.size
109
+ pix = list(gray.getdata())
110
+ pix = [pix[i * w:(i + 1) * w] for i in range(h)]
111
+ min_x = min_y = w
112
+ max_x = max_y = 0
113
  for y in range(h):
114
  for x in range(w):
115
+ if pix[y][x] < threshold:
116
  min_x, min_y = min(min_x, x), min(min_y, y)
117
  max_x, max_y = max(max_x, x), max(max_y, y)
118
  if min_x > max_x:
119
+ return img
120
  min_x, min_y = max(0, min_x - padding), max(0, min_y - padding)
121
  max_x, max_y = min(w - 1, max_x + padding), min(h - 1, max_y + padding)
122
+ return img.crop((min_x, min_y, max_x + 1, max_y + 1))
123
 
124
 
125
+ # ───────────────────────────
126
+ # Selenium フルページ SS
127
+ # ───────────────────────────
128
+ def render_fullpage_screenshot(html_code: str, extension_percentage: float = 6.0, trim_whitespace=True) -> Image.Image:
 
 
 
 
129
  driver: Optional[webdriver.Chrome] = None
130
+ tmp_path: Optional[str] = None
131
  try:
132
+ with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html", encoding="utf-8") as f:
133
+ tmp_path = f.name
134
+ f.write(html_code)
135
+ opts = Options()
136
+ opts.add_argument("--headless")
137
+ opts.add_argument("--no-sandbox")
138
+ opts.add_argument("--disable-dev-shm-usage")
139
+ driver = webdriver.Chrome(options=opts)
 
 
 
140
  driver.set_window_size(1200, 1000)
141
+ driver.get("file://" + tmp_path)
142
+ WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
143
+ time.sleep(3)
144
+ total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)")
145
+ vp = driver.execute_script("return window.innerHeight")
146
+ for i in range(max(1, int(total / vp)) + 1):
147
+ driver.execute_script(f"window.scrollTo(0, {i * (vp - 200)})")
 
 
 
 
 
 
 
148
  time.sleep(0.2)
149
+ driver.execute_script("window.scrollTo(0,0)")
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  time.sleep(0.5)
151
+ total = driver.execute_script("return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)")
152
+ h = int(total * (1 + extension_percentage / 100))
153
+ w = driver.execute_script("return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth)")
154
+ h, w = min(max(h, 100), 4000), min(max(w, 100), 2000)
155
+ driver.set_window_size(w, h)
156
+ time.sleep(0.5)
157
+ img = Image.open(BytesIO(driver.get_screenshot_as_png()))
158
+ return trim_image_whitespace(img, 248, 20) if trim_whitespace else img
159
  except Exception as e:
160
+ logger.error(f"Screenshot error: {e}", exc_info=True)
161
+ return Image.new("RGB", (1, 1))
162
  finally:
163
  if driver:
164
  try:
 
169
  os.remove(tmp_path)
170
 
171
 
172
+ # ───────────────────────────
173
+ # SafetySetting デフォルト
174
+ # ───────────────────────────
 
 
 
 
175
  def _default_safety() -> List[types.SafetySetting]:
176
  return [
177
+ types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
178
+ types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_MEDIUM_AND_ABOVE"),
179
+ types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
180
+ types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_MEDIUM_AND_ABOVE"),
 
 
 
 
 
 
 
 
181
  ]
182
 
183
 
184
+ # ───────────────────────────
185
+ # Gemini HTML
186
+ # ───────────────────────────
187
+ def generate_html_from_text(text: str, temperature: float = 0.3, style: str = "standard") -> str:
 
 
 
 
188
  api_key = os.getenv("GEMINI_API_KEY")
189
  if not api_key:
190
+ raise ValueError("GEMINI_API_KEY is not set")
191
  model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-pro")
192
+ client = genai.Client(api_key=api_key)
193
+
194
+ # thinking_budget=0 を 2.5 Flash のときだけ付与
195
+ think_cfg = types.ThinkingConfig(thinking_budget=0) if model_name == "gemini-2.5-flash-preview-04-17" else None
196
 
197
+ cfg_kwargs = dict(
198
+ system_instruction=load_system_instruction(style),
199
  temperature=temperature,
200
  top_p=0.7,
201
  top_k=20,
202
  max_output_tokens=8192,
203
  candidate_count=1,
204
+ safety_settings=_default_safety(),
 
 
 
 
 
 
 
 
 
 
205
  )
206
  if think_cfg:
207
+ cfg_kwargs["thinking_config"] = think_cfg
 
 
208
 
209
+ resp = client.models.generate_content(
210
+ model=model_name,
211
+ contents=text,
212
+ config=types.GenerateContentConfig(**cfg_kwargs),
213
  )
214
+ raw = resp.text or ""
215
+ start, end = raw.find("```html"), raw.rfind("```")
 
 
 
 
 
 
216
  if 0 <= start < end:
217
+ html = raw[start + 7:end].strip()
218
+ return enhance_font_awesome_layout(html)
219
+ logger.warning("```html``` ブロック未検出 — 生レスポンス返却")
 
220
  return raw
221
 
222
 
223
+ # ───────────────────────────
224
+ # テキスト → SS 統合
225
+ # ───────────────────────────
226
+ def text_to_screenshot(text: str, extension_percentage: float, temperature=0.3, trim_whitespace=True, style="standard"):
 
 
 
 
 
 
227
  try:
228
  html = generate_html_from_text(text, temperature, style)
229
  return render_fullpage_screenshot(html, extension_percentage, trim_whitespace)
230
  except Exception as e:
231
  logger.error(e, exc_info=True)
232
+ return Image.new("RGB", (1, 1))
233
 
234
 
235
+ # ────────────────────────��──
236
+ # FastAPI セットアップ
237
+ # ───────────────────────────
238
  app = FastAPI()
239
  app.add_middleware(
240
+ CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
 
 
 
 
241
  )
242
 
243
+ # Gradio 内蔵静的ファイル mount
244
+ gr_dir = os.path.dirname(gr.__file__)
245
+ for name, sub in [("static", "templates/frontend/static"), ("_app", "templates/frontend/_app"),
246
+ ("assets", "templates/frontend/assets"), ("cdn", "templates/cdn")]:
247
+ p = os.path.join(gr_dir, sub)
248
+ if os.path.exists(p):
249
+ app.mount(f"/{name}", StaticFiles(directory=p), name=name)
250
+
251
+ # ───────────────────────────
252
+ # API ルート
253
+ # ───────────────────────────
254
+ @app.post("/api/screenshot", response_class=StreamingResponse, tags=["Screenshot"])
255
+ async def api_screenshot(req: ScreenshotRequest):
256
+ img = render_fullpage_screenshot(req.html_code, req.extension_percentage, req.trim_whitespace)
 
 
 
 
 
 
 
 
 
 
 
 
257
  buf = BytesIO()
258
  img.save(buf, format="PNG")
259
  buf.seek(0)
260
  return StreamingResponse(buf, media_type="image/png")
261
 
262
 
263
+ @app.post("/api/text-to-screenshot", response_class=StreamingResponse, tags=["Gemini", "Screenshot"])
264
+ async def api_text_to_ss(req: GeminiRequest):
265
+ img = text_to_screenshot(req.text, req.extension_percentage, req.temperature, req.trim_whitespace, req.style)
266
+ buf = BytesIO(); img.save(buf, "PNG"); buf.seek(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  return StreamingResponse(buf, media_type="image/png")
268
 
269
 
270
+ # ───────────────────────────
271
+ # Gradio UI
272
+ # ───────────────────────────
273
+ def process_input(mode, inp, ext, temp, trim, style):
274
+ return render_fullpage_screenshot(inp, ext, trim) if mode == "HTML入力" else \
275
+ text_to_screenshot(inp, ext, temp, trim, style)
 
 
 
 
 
 
 
 
276
 
277
+ with gr.Blocks(title="Full Page Screenshot & Gemini 2.5 Flash") as iface:
278
+ gr.Markdown("## HTML ビューア & テキスト→インフォグラフィック")
279
+ mode_r = gr.Radio(["HTML入力", "テキスト入力"], value="HTML入力", label="入力モード")
280
+ inp_tb = gr.Textbox(lines=15, label="入力")
281
  with gr.Row():
282
+ style_dd = gr.Dropdown(["standard", "cute", "resort", "cool", "dental"], value="standard",
283
+ label="デザインスタイル", visible=False)
284
+ ext_sl = gr.Slider(0, 30, 10, label="上下高さ拡張率(%)")
285
+ temp_sl = gr.Slider(0.0, 1.0, 0.5, step=0.1, label="生成温度", visible=False)
286
+ trim_cb = gr.Checkbox(True, label="余白自動トリミング")
 
 
 
 
 
 
 
 
 
 
 
287
  btn = gr.Button("生成")
288
  out_img = gr.Image(type="pil", label="スクリーンショット")
289
+ mode_r.change(lambda m: [{"visible": m == "テキスト入力", "__type__": "update"}] * 2,
290
+ mode_r, [temp_sl, style_dd])
291
+ btn.click(process_input, [mode_r, inp_tb, ext_sl, temp_sl, trim_cb, style_dd], out_img)
292
+ gr.Markdown(f"*使用モデル*: `{os.getenv('GEMINI_MODEL', 'gemini-1.5-pro')}` "
293
+ "(gemini‑2.5‑flash‑preview‑04‑17 では thinking_budget=0 を自動付与)")
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  app = gr.mount_gradio_app(app, iface, path="/")
296
 
 
 
 
297
  if __name__ == "__main__":
298
  import uvicorn
 
 
299
  uvicorn.run(app, host="0.0.0.0", port=7860)