HTMLviewer2_API

Paused

App Files Files Community

tomo2chin2 commited on Mar 28, 2025

Commit

bd40890

verified ·

1 Parent(s): cafd2ee

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -40

app.py CHANGED Viewed

@@ -27,9 +27,10 @@ logger = logging.getLogger(__name__)
 class GeminiRequest(BaseModel):
     """Geminiへのリクエストデータモデル"""
     text: str
-    extension_percentage: float = 8.0  # Default value same as Gradio slider
-def generate_html_from_text(text):
     """テキストからHTMLを生成する"""
     try:
         # APIキーの取得と設定
@@ -144,7 +145,7 @@ def generate_html_from_text(text):
 ーーー＜ユーザーが入力（または添付）＞ーーー"""
         # モデルを初期化して処理
-        logger.info(f"Gemini APIにリクエストを送信: テキスト長さ = {len(text)}")
         # モデル初期化とフォールバック処理
         try:
@@ -157,10 +158,10 @@ def generate_html_from_text(text):
         # 生成設定
         generation_config = {
-            "temperature": 0.95,
             "top_p": 0.95,
             "top_k": 64,
-            "max_output_tokens": 8000,
         }
         # プロンプト構築
@@ -211,8 +212,8 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
     # Gemini生成コンテンツの場合、拡張率を調整
     if is_gemini_content:
-        # 最低でも10%の拡張を確保
-        extension_percentage = max(extension_percentage, 10.0)
         logger.info(f"Gemini生成コンテンツ用に拡張率を調整: {extension_percentage}%")
     # 1) Save HTML code to a temporary file
@@ -398,11 +399,11 @@ def render_fullpage_screenshot(html_code: str, extension_percentage: float, is_g
                 logger.error(f"Error removing temporary file {tmp_path}: {e}")
 # --- Geminiを使った新しい関数 ---
-def text_to_screenshot(text: str, extension_percentage: float) -> Image.Image:
     """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""
     try:
-        # 1. テキストからHTMLを生成
-        html_code = generate_html_from_text(text)
         # 2. HTMLからスクリーンショットを生成（Gemini生成コンテンツとしてフラグをオン）
         return render_fullpage_screenshot(html_code, extension_percentage, is_gemini_content=True)
@@ -455,7 +456,7 @@ if os.path.exists(cdn_dir):
 # Pydantic model for API request body validation
 class ScreenshotRequest(BaseModel):
     html_code: str
-    extension_percentage: float = 8.0 # Default value same as Gradio slider
 # API Endpoint for screenshot generation
 @app.post("/api/screenshot",
@@ -503,12 +504,13 @@ async def api_text_to_screenshot(request: GeminiRequest):
     テキストからHTMLインフォグラフィックを生成してスクリーンショットを返すAPIエンドポイント
     """
     try:
-        logger.info(f"テキスト→スクリーンショットAPIリクエスト受信。テキスト長さ: {len(request.text)}, 拡張率: {request.extension_percentage}%")
-        # テキストからHTMLを生成してスクリーンショットを作成
         pil_image = text_to_screenshot(
             request.text,
-            request.extension_percentage
         )
         if pil_image.size == (1, 1):
@@ -529,19 +531,19 @@ async def api_text_to_screenshot(request: GeminiRequest):
 # --- Gradio Interface Definition ---
 # 入力モードの選択用Radioコンポーネント
-def process_input(input_mode, html_input, text_input, extension_percentage):
     """入力モードに応じて適切な処理を行う"""
     if input_mode == "HTML入力":
         # HTMLモードの場合は既存の処理
-        return render_fullpage_screenshot(html_input, extension_percentage)
     else:
         # テキスト入力モードの場合はGemini APIを使用
-        return text_to_screenshot(text_input, extension_percentage)
 # Gradio UIの定義
 with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr.themes.Base()) as iface:
     gr.Markdown("# HTMLビューア & テキスト→インフォグラフィック変換")
-    gr.Markdown("HTMLをヘッドレスブラウザでレンダリングするか、テキストをGemini APIでインフォグラフィックに変換して画像として取得します。")
     with gr.Row():
         input_mode = gr.Radio(
@@ -550,35 +552,49 @@ with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr
             value="HTML入力"
         )
-    with gr.Tabs():
-        with gr.TabItem("HTML入力"):
-            html_input = gr.Textbox(
-                lines=15,
-                label="HTMLコード入力",
-                placeholder="<!DOCTYPE html>\n<html>\n<head>\n  <title>Example</title>\n</head>\n<body>\n  <h1>Hello World</h1>\n</body>\n</html>"
-            )
-        with gr.TabItem("テキスト入力"):
-            text_input = gr.Textbox(
-                lines=15,
-                label="テキスト入力 (Geminiで変換)",
-                placeholder="ここにテキストを入力すると、Gemini APIによってグラフィカルなHTMLに変換されます。"
-            )
-    extension_percentage = gr.Slider(
-        minimum=0,
-        maximum=30,  # より高い値を許可
-        step=1.0,
-        value=8.0,    # デフォルト値を増加
-        label="上下高さ拡張率（%）"
     )
     submit_btn = gr.Button("生成")
     output_image = gr.Image(type="pil", label="ページ全体のスクリーンショット")
     submit_btn.click(
         fn=process_input,
-        inputs=[input_mode, html_input, text_input, extension_percentage],
         outputs=output_image
     )

 class GeminiRequest(BaseModel):
     """Geminiへのリクエストデータモデル"""
     text: str
+    extension_percentage: float = 6.0  # ①デフォルト値を6%に変更
+    temperature: float = 1.0  # ④デフォルト値1.0の温度パラメータを追加
+def generate_html_from_text(text, temperature=1.0):
     """テキストからHTMLを生成する"""
     try:
         # APIキーの取得と設定
 ーーー＜ユーザーが入力（または添付）＞ーーー"""
         # モデルを初期化して処理
+        logger.info(f"Gemini APIにリクエストを送信: テキスト長さ = {len(text)}, 温度 = {temperature}")
         # モデル初期化とフォールバック処理
         try:
         # 生成設定
         generation_config = {
+            "temperature": temperature,  # ④パラメータとして受け取った温度を設定
             "top_p": 0.95,
             "top_k": 64,
+            "max_output_tokens": 8192,
         }
         # プロンプト構築
     # Gemini生成コンテンツの場合、拡張率を調整
     if is_gemini_content:
+        # ②最低でも5%の拡張を確保（20%から5%に変更）
+        extension_percentage = max(extension_percentage, 5.0)
         logger.info(f"Gemini生成コンテンツ用に拡張率を調整: {extension_percentage}%")
     # 1) Save HTML code to a temporary file
                 logger.error(f"Error removing temporary file {tmp_path}: {e}")
 # --- Geminiを使った新しい関数 ---
+def text_to_screenshot(text: str, extension_percentage: float, temperature: float = 1.0) -> Image.Image:
     """テキストをGemini APIでHTMLに変換し、スクリーンショットを生成する統合関数"""
     try:
+        # 1. テキストからHTMLを生成（温度パラメータも渡す）
+        html_code = generate_html_from_text(text, temperature)
         # 2. HTMLからスクリーンショットを生成（Gemini生成コンテンツとしてフラグをオン）
         return render_fullpage_screenshot(html_code, extension_percentage, is_gemini_content=True)
 # Pydantic model for API request body validation
 class ScreenshotRequest(BaseModel):
     html_code: str
+    extension_percentage: float = 6.0 # ①デフォルト値を6%に変更
 # API Endpoint for screenshot generation
 @app.post("/api/screenshot",
     テキストからHTMLインフォグラフィックを生成してスクリーンショットを返すAPIエンドポイント
     """
     try:
+        logger.info(f"テキスト→スクリーンショットAPIリクエスト受信。テキスト長さ: {len(request.text)}, 拡張率: {request.extension_percentage}%, 温度: {request.temperature}")
+        # テキストからHTMLを生成してスクリーンショットを作成（温度パラメータも渡す）
         pil_image = text_to_screenshot(
             request.text,
+            request.extension_percentage,
+            request.temperature
         )
         if pil_image.size == (1, 1):
 # --- Gradio Interface Definition ---
 # 入力モードの選択用Radioコンポーネント
+def process_input(input_mode, input_text, extension_percentage, temperature):
     """入力モードに応じて適切な処理を行う"""
     if input_mode == "HTML入力":
         # HTMLモードの場合は既存の処理
+        return render_fullpage_screenshot(input_text, extension_percentage)
     else:
         # テキスト入力モードの場合はGemini APIを使用
+        return text_to_screenshot(input_text, extension_percentage, temperature)
 # Gradio UIの定義
 with gr.Blocks(title="Full Page Screenshot (テキスト変換対応)", theme=gr.themes.Base()) as iface:
     gr.Markdown("# HTMLビューア & テキスト→インフォグラフィック変換")
+    gr.Markdown("HTMLコードをレンダリングするか、テキストをGemini APIでインフォグラフィックに変換して画像として取得します。")
     with gr.Row():
         input_mode = gr.Radio(
             value="HTML入力"
         )
+    # ③共用のテキストボックス（タブ無し）
+    input_text = gr.Textbox(
+        lines=15,
+        label="入力",
+        placeholder="HTMLコードまたはテキストを入力してください。入力モードに応じて処理されます。"
     )
+    with gr.Row():
+        extension_percentage = gr.Slider(
+            minimum=0,
+            maximum=30,
+            step=1.0,
+            value=6,    # ①デフォルト値を6%に変更
+            label="上下高さ拡張率（%）"
+        )
+        # ④温度調整スライダー（テキストモード時のみ表示）
+        temperature = gr.Slider(
+            minimum=0.0,
+            maximum=1.4,
+            step=0.1,
+            value=1.0,    # デフォルト値1.0
+            label="生成時の温度（創造性）",
+            visible=False  # 最初は非表示
+        )
     submit_btn = gr.Button("生成")
     output_image = gr.Image(type="pil", label="ページ全体のスクリーンショット")
+    # 入力モード変更時のイベント処理（テキストモード時のみ温度スライダーを表示）
+    def update_temperature_visibility(mode):
+        return gr.Slider.update(visible=(mode == "テキスト入力"))
+    input_mode.change(
+        fn=update_temperature_visibility,
+        inputs=input_mode,
+        outputs=temperature
+    )
+    # 生成ボタンクリック時のイベント処理
     submit_btn.click(
         fn=process_input,
+        inputs=[input_mode, input_text, extension_percentage, temperature],
         outputs=output_image
     )