Spaces:

tomo2chin2
/

ImageGenMCP

Paused

App Files Files Community

tomo2chin2 commited on May 31, 2025

Commit

3f4aae4

verified ·

1 Parent(s): 4dc437c

Upload 7 files

Browse files

Files changed (7) hide show

.gitignore +50 -0
CLAUDE.md +21 -0
README.md +86 -4
app.py +287 -0
app.yaml +9 -0
gemini_sample_code.txt +81 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,50 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+ENV/
+env/
+.env
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+*.log
+*.png
+*.jpg
+*.jpeg
+*.gif
+*.bmp
+generated_images/
+# Hugging Face Spaces
+.gradio/

CLAUDE.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## 重要な指示
+- **常に日本語で会話すること**
+- このプロジェクトはHugging Face Spacesで動作するGradio 5.31.0を使用したアプリケーションの開発が目的です。
+- 開発するアプリケーションはClaudeCodeに使用させるための画像生成MCPサーバーです。画像生成にはgemini2.0flashを活用します。サンプルコードはgemini_sample_code.txt
+## Project Overview
+ImageGenMCP - Hugging Face Spaces上で動作する画像生成アプリケーション（Gradio 5.31.0使用）
+## Development Commands
+*To be updated once the project is initialized with package.json or other configuration files.*
+## Architecture
+*To be documented once the codebase structure is established.*

README.md CHANGED Viewed

@@ -1,12 +1,94 @@
 ---
 title: ImageGenMCP
-emoji: 🌍
-colorFrom: green
 colorTo: purple
 sdk: gradio
-sdk_version: 5.32.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: ImageGenMCP
+emoji: 🎨
+colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 5.31.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# ImageGenMCP - Image Generation MCP Server
+A Gradio-based image generation application using Google's Gemini 2.0 Flash Preview model, designed to work as an MCP (Model Context Protocol) server for Claude Code.
+## Features
+- 🎨 High-quality image generation using Gemini 2.0 Flash Preview
+- 🖼️ Support for reference images to guide generation
+- 🔧 MCP server integration for Claude Code
+- 🌐 Web interface powered by Gradio 5.31.0
+- 📝 Detailed logging for debugging
+## Setup
+### Environment Variables
+Set your Gemini API key:
+```bash
+export GEMINI_API_KEY="your-gemini-api-key"
+```
+### Local Development
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+**Web UI mode:**
+```bash
+python app.py
+```
+**MCP server mode:**
+```bash
+python app.py --mcp
+```
+## Usage with Claude Code
+Add this configuration to your Claude Code settings:
+```json
+{
+  "mcpServers": {
+    "image-gen": {
+      "command": "python",
+      "args": ["/path/to/app.py", "--mcp"],
+      "env": {
+        "GEMINI_API_KEY": "your-api-key"
+      }
+    }
+  }
+}
+```
+## API
+### MCP Tool: `generate_image`
+**Description:** Generates images using Gemini 2.0 Flash Preview
+**Parameters:**
+- `prompt` (string, required): Description of the image to generate
+**Returns:**
+- `success` (boolean): Whether generation was successful
+- `message` (string): Status message
+- `image_base64` (string): Base64-encoded PNG image (if successful)
+## Requirements
+- Python 3.8+
+- Gemini API key with access to `gemini-2.0-flash-preview-image-generation` model
+- Dependencies listed in `requirements.txt`
+## License
+MIT

app.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import gradio as gr
+import os
+import base64
+import io
+from PIL import Image
+from google import genai
+from google.genai import types
+import json
+import asyncio
+from typing import Dict, Any, List, Optional
+from mcp import Server, NotificationOptions
+from mcp.server.models import InitializationOptions
+import mcp.server.stdio
+import mcp.types as types_mcp
+import logging
+import traceback
+# ログ設定
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Gemini APIクライアントの初期化
+def get_gemini_client():
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise ValueError("GEMINI_API_KEY環境変数が設定されていません")
+    return genai.Client(api_key=api_key)
+# 画像生成関数
+def generate_image(prompt: str, previous_image: Optional[Image.Image] = None) -> tuple[Optional[Image.Image], str]:
+    """
+    Gemini 2.0 Flashを使用して画像を生成する
+    Args:
+        prompt: 生成したい画像の説明
+        previous_image: 参考にする前の画像（オプション）
+    Returns:
+        生成された画像とステータスメッセージ
+    """
+    try:
+        logger.info(f"画像生成開始: プロンプト='{prompt[:50]}...', 参考画像={'あり' if previous_image else 'なし'}")
+        client = get_gemini_client()
+        model = "gemini-2.0-flash-preview-image-generation"  # 画像生成対応モデル
+        # コンテンツの準備
+        contents = []
+        # ユーザープロンプトの追加
+        contents.append(
+            types.Content(
+                role="user",
+                parts=[types.Part.from_text(text=prompt)]
+            )
+        )
+        # 前の画像がある場合は追加
+        if previous_image:
+            # PILイメージをbase64に変換
+            buffered = io.BytesIO()
+            previous_image.save(buffered, format="PNG")
+            img_data = base64.b64encode(buffered.getvalue()).decode()
+            contents.append(
+                types.Content(
+                    role="model",
+                    parts=[
+                        types.Part.from_bytes(
+                            mime_type="image/png",
+                            data=base64.b64decode(img_data)
+                        )
+                    ]
+                )
+            )
+            # 追加の指示
+            contents.append(
+                types.Content(
+                    role="user",
+                    parts=[types.Part.from_text(text="上記の画像を参考に、以下の要望に従って新しい画像を生成してください: " + prompt)]
+                )
+            )
+        # 生成設定
+        generate_content_config = types.GenerateContentConfig(
+            response_modalities=["IMAGE"],
+            # response_mime_typeは指定しない（Gemini 2.0 Flashの仕様に合わせる）
+        )
+        # 画像生成
+        logger.info("Gemini APIを呼び出し中...")
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config
+        )
+        logger.info("Gemini APIの呼び出し完了")
+        # レスポンスから画像を取得
+        if (response.candidates and
+            response.candidates[0].content and
+            response.candidates[0].content.parts and
+            response.candidates[0].content.parts[0].inline_data):
+            image_data = response.candidates[0].content.parts[0].inline_data.data
+            image = Image.open(io.BytesIO(image_data))
+            logger.info(f"画像生成成功: サイズ={image.size}")
+            return image, "画像生成に成功しました！"
+        else:
+            logger.warning("レスポンスに画像データが含まれていません")
+            return None, "画像の生成に失敗しました。レスポンスに画像データが含まれていません。"
+    except Exception as e:
+        logger.error(f"画像生成エラー: {str(e)}")
+        logger.error(traceback.format_exc())
+        return None, f"エラーが発生しました: {str(e)}"
+# MCPサーバーの設定
+class ImageGenMCPServer:
+    def __init__(self):
+        self.server = Server("image-gen-mcp")
+        self.setup_handlers()
+    def setup_handlers(self):
+        @self.server.list_tools()
+        async def handle_list_tools() -> list[types_mcp.Tool]:
+            return [
+                types_mcp.Tool(
+                    name="generate_image",
+                    description="Gemini 2.0 Flashを使用して画像を生成します",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "prompt": {
+                                "type": "string",
+                                "description": "生成したい画像の説明"
+                            }
+                        },
+                        "required": ["prompt"]
+                    }
+                )
+            ]
+        @self.server.call_tool()
+        async def handle_call_tool(
+            name: str,
+            arguments: Optional[Dict[str, Any]] = None
+        ) -> List[types_mcp.TextContent]:
+            if name == "generate_image":
+                prompt = arguments.get("prompt", "")
+                logger.info(f"MCPツール呼び出し: generate_image, プロンプト='{prompt[:50]}...'")
+                # 同期関数を非同期で実行
+                loop = asyncio.get_event_loop()
+                image, message = await loop.run_in_executor(
+                    None, generate_image, prompt, None
+                )
+                if image:
+                    # 画像をbase64エンコード
+                    buffered = io.BytesIO()
+                    image.save(buffered, format="PNG")
+                    img_str = base64.b64encode(buffered.getvalue()).decode()
+                    logger.info("MCPレスポンス: 成功")
+                    return [
+                        types_mcp.TextContent(
+                            type="text",
+                            text=json.dumps({
+                                "success": True,
+                                "message": message,
+                                "image_base64": img_str
+                            })
+                        )
+                    ]
+                else:
+                    logger.warning(f"MCPレスポンス: 失敗 - {message}")
+                    return [
+                        types_mcp.TextContent(
+                            type="text",
+                            text=json.dumps({
+                                "success": False,
+                                "message": message
+                            })
+                        )
+                    ]
+            raise ValueError(f"Unknown tool: {name}")
+    async def run(self):
+        logger.info("MCPサーバー起動中...")
+        async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+            await self.server.run(
+                read_stream,
+                write_stream,
+                InitializationOptions(
+                    server_name="image-gen-mcp",
+                    server_version="0.1.0"
+                )
+            )
+# Gradioインターフェースの作成
+def create_gradio_interface():
+    with gr.Blocks(title="画像生成MCP - Gemini 2.0 Flash") as demo:
+        gr.Markdown("""
+        # 画像生成MCPサーバー
+        Gemini 2.0 Flashを使用した画像生成アプリケーションです。
+        ClaudeCodeから画像生成MCPツールとして利用できます。
+        """)
+        with gr.Row():
+            with gr.Column():
+                prompt_input = gr.Textbox(
+                    label="プロンプト",
+                    placeholder="生成したい画像の説明を入力してください...",
+                    lines=3
+                )
+                reference_image = gr.Image(
+                    label="参考画像（オプション）",
+                    type="pil"
+                )
+                generate_btn = gr.Button("画像を生成", variant="primary")
+            with gr.Column():
+                output_image = gr.Image(
+                    label="生成された画像",
+                    type="pil"
+                )
+                status_output = gr.Textbox(
+                    label="ステータス",
+                    interactive=False
+                )
+        # MCPサーバー情報の表示
+        gr.Markdown("""
+        ### MCPサーバー情報
+        このアプリケーションはMCPサーバーとして動作します。
+        ClaudeCodeで使用する場合は、以下の設定を使用してください：
+        ```json
+        {
+          "mcpServers": {
+            "image-gen": {
+              "command": "python",
+              "args": ["app.py", "--mcp"],
+              "env": {
+                "GEMINI_API_KEY": "your-api-key-here"
+              }
+            }
+          }
+        }
+        ```
+        """)
+        # イベントハンドラ
+        generate_btn.click(
+            fn=generate_image,
+            inputs=[prompt_input, reference_image],
+            outputs=[output_image, status_output]
+        )
+    return demo
+# メイン実行部分
+if __name__ == "__main__":
+    import sys
+    if "--mcp" in sys.argv:
+        # MCPサーバーモードで実行
+        logger.info("MCP���ーバーモードで起動")
+        mcp_server = ImageGenMCPServer()
+        asyncio.run(mcp_server.run())
+    else:
+        # Gradio UIモードで実行
+        logger.info("Gradio UIモードで起動")
+        demo = create_gradio_interface()
+        demo.launch()

app.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+title: ImageGenMCP
+emoji: 🎨
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 5.31.0
+app_file: app.py
+pinned: false
+license: mit

gemini_sample_code.txt ADDED Viewed

	@@ -0,0 +1,81 @@

+# To run this code you need to install the following dependencies:
+# pip install google-genai
+import base64
+import mimetypes
+import os
+from google import genai
+from google.genai import types
+def save_binary_file(file_name, data):
+    f = open(file_name, "wb")
+    f.write(data)
+    f.close()
+    print(f"File saved to to: {file_name}")
+def generate():
+    client = genai.Client(
+        api_key=os.environ.get("GEMINI_API_KEY"),
+    )
+    model = "gemini-2.0-flash-preview-image-generation"
+    contents = [
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_text(text="""筋トレ中の女性
+"""),
+            ],
+        ),
+        types.Content(
+            role="model",
+            parts=[
+                types.Part.from_bytes(
+                    mime_type="image/png",
+                    data=base64.b64decode(
+                        """  ＜画像データが入ります＞　"""
+                    ),
+                ),
+            ],
+        ),
+        types.Content(
+            role="user",
+            parts=[
+                types.Part.from_text(text="""INSERT_INPUT_HERE"""),
+            ],
+        ),
+    ]
+    generate_content_config = types.GenerateContentConfig(
+        response_modalities=[
+            "IMAGE",
+            "TEXT",
+        ],
+        response_mime_type="text/plain",
+    )
+    file_index = 0
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        if (
+            chunk.candidates is None
+            or chunk.candidates[0].content is None
+            or chunk.candidates[0].content.parts is None
+        ):
+            continue
+        if chunk.candidates[0].content.parts[0].inline_data and chunk.candidates[0].content.parts[0].inline_data.data:
+            file_name = f"ENTER_FILE_NAME_{file_index}"
+            file_index += 1
+            inline_data = chunk.candidates[0].content.parts[0].inline_data
+            data_buffer = inline_data.data
+            file_extension = mimetypes.guess_extension(inline_data.mime_type)
+            save_binary_file(f"{file_name}{file_extension}", data_buffer)
+        else:
+            print(chunk.text)
+if __name__ == "__main__":
+    generate()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==5.31.0
+google-genai
+mcp
+uvicorn
+fastapi
+python-multipart
+pillow