Spaces:

i0switch
/

my-image-generation

Running on Zero

App Files Files Community

i0switch commited on Jun 21

Commit

942bdcb

verified ·

1 Parent(s): 92aaea0

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -142

app.py CHANGED Viewed

@@ -14,22 +14,21 @@ from pathlib import Path
 # FastAPI関連（ハイブリッド構成のため維持）
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-# グローバル変数としてパイプラインを定義（初期値はNone）
-pipe = None
-face_app = None
-upsampler = None
-UPSCALE_OK = False
-# 0. Cache dir & helpers (起動時に実行)
 PERSIST_BASE = Path("/data")
-CACHE_ROOT = (PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
-              else Path.home() / ".cache" / "instantid_cache")
-MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR = CACHE_ROOT/"models", CACHE_ROOT/"models"/"Lora", CACHE_ROOT/"embeddings", CACHE_ROOT/"realesrgan"
-for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
-    p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     if dst.exists(): return
     for i in range(1, attempts + 1):
         print(f"⬇ Downloading {dst.name} (try {i}/{attempts})")
@@ -48,120 +47,72 @@ print("— Asset download check finished —")
 # 2. パイプライン初期化関数 (GPU確保後に呼び出される)
-def initialize_pipelines():
-    global pipe, face_app, upsampler, UPSCALE_OK
-    # torch/diffusers/onnxruntimeなどのインポートを関数内に移動
-    from diffusers import StableDiffusionPipeline, ControlNetModel, DPMSolverMultistepScheduler, AutoencoderKL
     from insightface.app import FaceAnalysis
-    print("--- Initializing Pipelines (GPU is now available) ---")
-    device = torch.device("cuda") # ZeroGPUではGPUが保証されている
-    dtype = torch.float16
-    # FaceAnalysis
-    if face_app is None:
-        print("Initializing FaceAnalysis...")
-        providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
-        face_app.prepare(ctx_id=0, det_size=(640, 640))
-        print("FaceAnalysis initialized.")
-    # Main Pipeline
-    if pipe is None:
-        print("Loading ControlNet...")
-        controlnet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype)
-        print("Loading StableDiffusionPipeline...")
-        pipe = StableDiffusionPipeline.from_single_file(BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2)
-        print("Moving pipeline to GPU...")
-        pipe.to(device) # .to(device)をここで呼ぶ
-        print("Loading VAE...")
-        pipe.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype).to(device)
-        pipe.controlnet = controlnet
-        print("Configuring Scheduler...")
-        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
-        print("Loading IP-Adapter and LoRA...")
-        pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name=IP_BIN_FILE.name)
-        pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
-        pipe.set_ip_adapter_scale(0.65)
-        print("Main pipeline initialized.")
-    # Upscaler
-    if upsampler is None and not UPSCALE_OK: # 一度失敗したら再試行しない
-        print("Checking for Upscaler...")
-        try:
-            from basicsr.archs.rrdb_arch import RRDBNet
-            from realesrgan import RealESRGAN
-            rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
-            upsampler = RealESRGAN(device, rrdb, scale=8)
-            upsampler.load_weights(str(UPSCALE_DIR / "RealESRGAN_x8plus.pth"))
-            UPSCALE_OK = True
-            print("Upscaler initialized successfully.")
-        except Exception as e:
-            UPSCALE_OK = False # 失敗を記録
-            print(f"Real-ESRGAN disabled → {e}")
-    print("--- All pipelines ready ---")
-# 4. Core generation logic
-BASE_PROMPT = ("(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n""photo of {subject},\n""cinematic lighting, golden hour, rim light, shallow depth of field,\n""textured skin, high detail, shot on Canon EOS R5, 85 mm f/1.4, ISO 200,\n""<lora:ip-adapter-faceid-plusv2_sd15_lora:0.65>, (face),\n""(aesthetic:1.1), (cinematic:0.8)")
-NEG_PROMPT = ("ng_deepnegative_v1_75t, CyberRealistic_Negative-neg, UnrealisticDream, ""(worst quality:2), (low quality:1.8), lowres, (jpeg artifacts:1.2), ""painting, sketch, illustration, drawing, cartoon, anime, cgi, render, 3d, ""monochrome, grayscale, text, logo, watermark, signature, username, ""(MajicNegative_V2:0.8), bad hands, extra digits, fused fingers, malformed limbs, ""missing arms, missing legs, (badhandv4:0.7), BadNegAnatomyV1-neg, skin blemishes, acnes, age spot, glans")
-# ZeroGPUで実行される本体。durationを60秒に設定。
-@spaces.GPU(duration=60)
-def _generate_core(face_img, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor, progress=gr.Progress(track_tqdm=True)):
-    # 初回呼び出し時にパイプラインを初期化
-    initialize_pipelines()
-    progress(0, desc="Generating image...")
-    prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
-    if add_prompt: prompt += ", " + add_prompt
-    neg = NEG_PROMPT + (", " + add_neg if add_neg else "")
-    pipe.set_ip_adapter_scale(ip_scale)
-    result = pipe(prompt=prompt, negative_prompt=neg, ip_adapter_image=face_img, image=face_img, controlnet_conditioning_scale=0.9, num_inference_steps=int(steps) + 5, guidance_scale=cfg, width=int(w), height=int(h)).images[0]
-    if upscale and UPSCALE_OK:
-        progress(0.8, desc="Upscaling...")
-        up, _ = upsampler.enhance(cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor)
-        result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
-    return result
-# GradioのUIから呼び出されるラッパー関数
-def generate_ui(face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor, progress=gr.Progress(track_tqdm=True)):
-    if face_np is None: raise gr.Error("顔画像をアップロードしてください。")
-    # NumPy配列をPillow画像に変換
-    face_img = Image.fromarray(face_np)
-    return _generate_core(face_img, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor, progress)
-# 5. Gradio UI Definition
-with gr.Blocks() as demo:
-    gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU)")
-    with gr.Row():
         with gr.Column():
-            face_in = gr.Image(label="顔写真",type="numpy")
-            subj_in = gr.Textbox(label="被写体説明",placeholder="e.g. woman in black suit, smiling")
-            add_in = gr.Textbox(label="追加プロンプト")
-            addneg_in = gr.Textbox(label="追加ネガティブ")
-            with gr.Accordion("詳細設定", open=False):
-                ip_sld = gr.Slider(0,1.5,0.65,step=0.05,label="IP‑Adapter scale")
-                cfg_sld = gr.Slider(1,15,6,step=0.5,label="CFG")
                 step_sld = gr.Slider(10,50,20,step=1,label="Steps")
-                w_sld = gr.Slider(512,1024,512,step=64,label="幅")
-                h_sld = gr.Slider(512,1024,768,step=64,label="高さ")
-                up_ck = gr.Checkbox(label="アップスケール",value=True)
-                up_fac = gr.Slider(1,8,2,step=1,label="倍率")
             btn = gr.Button("生成",variant="primary")
         with gr.Column():
             out_img = gr.Image(label="結果")
@@ -169,39 +120,38 @@ with gr.Blocks() as demo:
     # .queue() はGradioの通常機能として必要
     demo.queue()
     btn.click(
         fn=generate_ui,
         inputs=[face_in,subj_in,add_in,addneg_in,cfg_sld,ip_sld,step_sld,w_sld,h_sld,up_ck,up_fac],
-        outputs=out_img
     )
-# 6. FastAPI Mounting
 app = FastAPI()
-# FastAPIのエンドポイントを定義。こちらも内部で_generate_coreを呼ぶ
 @app.post("/api/predict")
-async def predict_endpoint(
-    face_image: UploadFile = File(...),
-    subject: str = Form("a woman"),
     add_prompt: str = Form(""),
     add_neg: str = Form(""),
     cfg: float = Form(6.0),
-    ip_scale: float = Form(0.65),
     steps: int = Form(20),
     w: int = Form(512),
     h: int = Form(768),
     upscale: bool = Form(True),
-    up_factor: float = Form(2.0)
 ):
     try:
-        contents = await face_image.read()
-        pil_image = Image.open(io.BytesIO(contents))
-        # FastAPI経由の呼び出しも同じコア関数を利用
-        result_pil_image = _generate_core(
-            pil_image, subject, add_prompt, add_neg, cfg, ip_scale,
-            steps, w, h, upscale, up_factor
-        )
         buffered = io.BytesIO()
         result_pil_image.save(buffered, format="PNG")
@@ -216,9 +166,12 @@ async def predict_endpoint(
 app = gr.mount_gradio_app(app, demo, path="/")
 print("Application startup script finished. Waiting for requests.")
-# app.py の末尾に追加
 if __name__ == "__main__":
-    import uvicorn
-    # SpacesでGradioアプリを動かす際の標準ポートは7860です
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 # FastAPI関連（ハイブリッド構成のため維持）
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+##############################################################################
+# 0. 設定とヘルパー
+##############################################################################
+# モデル・LoRA キャッシュを /data に置ける場合はそちらを優先
 PERSIST_BASE = Path("/data")
+CACHE_ROOT   = (PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists()
+                and os.access(PERSIST_BASE, os.W_OK)
+                else Path.home() / ".cache" / "instantid_cache")
+MODELS_DIR   = CACHE_ROOT / "models"
+LORA_DIR     = CACHE_ROOT / "lora"
+for d in (MODELS_DIR, LORA_DIR):
+    d.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
+    """冪等ダウンロード（既存ならスキップ、リトライ付き）"""
     if dst.exists(): return
     for i in range(1, attempts + 1):
         print(f"⬇ Downloading {dst.name} (try {i}/{attempts})")
 # 2. パイプライン初期化関数 (GPU確保後に呼び出される)
+def load_pipeline():
+    from diffusers import (
+        StableDiffusionPipeline, ControlNetModel,
+        DPMSolverMultistepScheduler, AutoencoderKL,
+    )
     from insightface.app import FaceAnalysis
+    print("→ Loading models to GPU …")
+    # --- InstantID 主要モデル ---
+    vae      = AutoencoderKL.from_pretrained(
+        "stabilityai/sd-vae-ft-mse",
+        torch_dtype=torch.float16
+    )
+    base     = StableDiffusionPipeline.from_single_file(
+        str(BASE_CKPT),
+        vae=vae,
+        torch_dtype=torch.float16,
+        safety_checker=None,
+        original_config_file="v1-inference.yaml"  # StableDiffusion1.x 互換
+    )
+    control  = ControlNetModel.from_pretrained(
+        "lllyasviel/control_v11p_sd15_openpose",
+        torch_dtype=torch.float16
+    )
+    pipe     = StableDiffusionPipeline(
+        vae=vae,
+        text_encoder=base.text_encoder,
+        tokenizer=base.tokenizer,
+        unet=base.unet,
+        controlnet=control,
+        scheduler=DPMSolverMultistepScheduler.from_config(base.scheduler.config),
+        safety_checker=None,
+        feature_extractor=base.feature_extractor,
+        requires_safety_checker=False
+    ).to("cuda", dtype=torch.float16)
+    pipe.load_lora_weights(str(LORA_FILE))
+    pipe.set_adapters(["ip_adapter_face"], [1.0])
+    pipe.enable_xformers_memory_efficient_attention()
+    # --- InsightFace ---
+    face_analyzer = FaceAnalysis(name="antelopev2", providers=["CUDAExecutionProvider"])
+    face_analyzer.prepare(ctx_id=0, det_size=(640, 640))
+    print("✓ Model loading complete.")
+    return pipe, face_analyzer
+##############################################################################
+# 3. Gradio UI
+##############################################################################
+with gr.Blocks(title="InstantID × Beautiful Realistic Asians v7") as demo:
+    with gr.Row(equal_height=True):
         with gr.Column():
+            face_in   = gr.Image(type="pil", label="顔画像 (必須)")
+            subj_in   = gr.Textbox(label="被写体説明", placeholder="例: 20代日本人女性")
+            add_in    = gr.Textbox(label="追加プロンプト", placeholder="例: masterpiece, best quality, ...")
+            addneg_in = gr.Textbox(label="ネガティブ", value="(worst quality:2), lowres, bad hand, ...")
+            with gr.Row():
+                ip_sld   = gr.Slider(0.0,1.0,0.6,step=0.05,label="IP Adapter Weight")
+                cfg_sld  = gr.Slider(1,15,6,step=0.5,label="CFG")
                 step_sld = gr.Slider(10,50,20,step=1,label="Steps")
+                w_sld    = gr.Slider(512,1024,512,step=64,label="幅")
+                h_sld    = gr.Slider(512,1024,768,step=64,label="高さ")
+                up_ck    = gr.Checkbox(label="アップスケール",value=True)
+                up_fac   = gr.Slider(1,8,2,step=1,label="倍率")
             btn = gr.Button("生成",variant="primary")
         with gr.Column():
             out_img = gr.Image(label="結果")
     # .queue() はGradioの通常機能として必要
     demo.queue()
+    def generate_ui(face_img, subj, add, addneg, cfg, ipw, steps, w, h, upscale, up_factor):
+        # 実際の推論関数（省略：ここに InstantID 推論処理を実装）
+        return face_img  # ダミー
     btn.click(
         fn=generate_ui,
         inputs=[face_in,subj_in,add_in,addneg_in,cfg_sld,ip_sld,step_sld,w_sld,h_sld,up_ck,up_fac],
+        outputs=[out_img]
     )
+##############################################################################
+# 4. FastAPI エンドポイント（REST API 用）
+##############################################################################
 app = FastAPI()
 @app.post("/api/predict")
+async def predict(
+    face: UploadFile = File(...),
+    subject: str = Form(...),
     add_prompt: str = Form(""),
     add_neg: str = Form(""),
     cfg: float = Form(6.0),
+    ipw: float = Form(0.6),
     steps: int = Form(20),
     w: int = Form(512),
     h: int = Form(768),
     upscale: bool = Form(True),
+    up_factor: int = Form(2)
 ):
     try:
+        # 実際の推論ロジック（省略）
+        result_pil_image = Image.open(face.file)  # ダミー
         buffered = io.BytesIO()
         result_pil_image.save(buffered, format="PNG")
 app = gr.mount_gradio_app(app, demo, path="/")
 print("Application startup script finished. Waiting for requests.")
+#------------------------------------------------------------------------
+# 5. Uvicorn サーバー起動（Spaces が呼び出すエントリポイント）
+#------------------------------------------------------------------------
 if __name__ == "__main__":
+    import uvicorn, os
+    # Hugging Face Spaces が $PORT を渡してくる場合はそれを優先
+    port = int(os.getenv("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port, workers=1, log_level="info")