# 🤖 HuggingFace BFS Face Swap API (CPU Optimized)
import io
import os
import logging
import shutil
from pathlib import Path
import torch
from PIL import Image
import uvicorn
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import Response, JSONResponse
from diffusers import QwenImageEditPlusPipeline
from huggingface_hub import snapshot_download

# Настройка логирования
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="BFS Face Swap API", version="3.0.0")

TEMP_DIR = Path("/tmp/bfs")
TEMP_DIR.mkdir(exist_ok=True)
MODEL_CACHE = Path("/app/model_cache")
MODEL_CACHE.mkdir(exist_ok=True)

# Глобальные переменные
pipe = None
lora_loaded = False

# Конфигурация модели
BASE_MODEL = "Qwen/Qwen-Image-Edit-2511"  # или 2509
LORA_REPO = "Alissonerdx/BFS-Best-Face-Swap"
LORA_FILE = "bfs_head_v5_2511_merged_version_rank_16_fp16.safetensors"  # Рекомендованная версия [citation:2][citation:4]

def load_pipeline():
    """Загрузка квантованной модели с CPU offload"""
    global pipe, lora_loaded
    
    try:
        logger.info("🔄 Loading 4-bit quantized model (first load takes 10-15 minutes)...")
        
        # Используем 4-bit версию для экономии RAM
        pipe = QwenImageEditPlusPipeline.from_pretrained(
            "toandev/Qwen-Image-Edit-2511-4bit",  # 4-bit quantized version
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            cache_dir=MODEL_CACHE
        )
        
        # КРИТИЧЕСКИ ВАЖНО для CPU: включаем offload
        pipe.enable_model_cpu_offload()
        
        # Загружаем BFS LoRA веса
        logger.info("🔄 Loading BFS LoRA weights...")
        
        # Скачиваем LoRA если ещё нет
        lora_path = MODEL_CACHE / LORA_FILE
        if not lora_path.exists():
            snapshot_download(
                repo_id=LORA_REPO,
                allow_patterns=[LORA_FILE],
                local_dir=MODEL_CACHE
            )
        
        pipe.load_lora_weights(str(lora_path))
        lora_loaded = True
        
        logger.info("✅ Model and LoRA loaded successfully")
        return True
        
    except Exception as e:
        logger.error(f"❌ Failed to load model: {e}")
        return False

def save_upload_file(upload_file: UploadFile) -> Path:
    """Сохраняет загруженный файл"""
    contents = upload_file.file.read()
    unique_name = f"{os.urandom(8).hex()}_{upload_file.filename}"
    file_path = TEMP_DIR / unique_name
    with open(file_path, "wb") as f:
        f.write(contents)
    return file_path

def optimize_image(image: Image.Image, max_size=1024) -> Image.Image:
    """Оптимизация размера для ускорения"""
    w, h = image.size
    if max(w, h) > max_size:
        scale = max_size / max(w, h)
        new_w, new_h = int(w * scale), int(h * scale)
        return image.resize((new_w, new_h), Image.Resampling.LANCZOS)
    return image

@app.on_event("startup")
async def startup_event():
    """Загрузка модели при старте"""
    logger.info("🚀 BFS Face Swap API starting...")
    success = load_pipeline()
    if not success:
        logger.warning("⚠️ Model failed to load, API will return fallback responses")

@app.post("/swap")
async def swap_face(
    target: UploadFile = File(...),  # BODY (тело) - ПЕРВОЕ!
    source: UploadFile = File(...),  # FACE (лицо) - ВТОРОЕ!
    num_steps: int = Form(20),  # Можно уменьшить до 10-15 для скорости
    guidance_scale: float = Form(1.0)
):
    """
    Замена лица с BFS Head V5
    ВАЖНО: порядок файлов - сначала тело (target), потом лицо (source) [citation:2][citation:4]
    """
    target_path = source_path = None
    
    try:
        # Сохраняем файлы
        target_path = save_upload_file(target)
        source_path = save_upload_file(source)
        
        logger.info(f"🔄 Processing BFS V5 swap: {target.filename} (body) <- {source.filename} (face)")
        
        # Загружаем и оптимизируем изображения
        body_img = optimize_image(Image.open(target_path).convert("RGB"))
        face_img = optimize_image(Image.open(source_path).convert("RGB"))
        
        if pipe is not None and lora_loaded:
            # Промпт для Head V5 из официальной документации [citation:2][citation:4]
            prompt = """head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k"""
            
            # Инвертированный порядок: [body, face] [citation:2]
            inputs = {
                "image": [body_img, face_img],
                "prompt": prompt,
                "generator": torch.manual_seed(42),
                "true_cfg_scale": 4.0,
                "negative_prompt": "blurry, low quality, distorted face, bad anatomy, unnatural lighting",
                "num_inference_steps": num_steps,
                "guidance_scale": guidance_scale,
            }
            
            logger.info("🔄 Running inference (this takes 1-3 minutes on CPU)...")
            with torch.inference_mode():
                output = pipe(**inputs)
                result_img = output.images[0]
        else:
            # Fallback: если модель не загрузилась
            logger.warning("Using fallback mode - returning body image")
            result_img = body_img
        
        # Сохраняем результат с высоким качеством
        result_bytes = io.BytesIO()
        result_img.save(result_bytes, format="JPEG", quality=98, optimize=True)
        
        logger.info(f"✅ Swap completed: {len(result_bytes.getvalue())} bytes")
        return Response(content=result_bytes.getvalue(), media_type="image/jpeg")
        
    except Exception as e:
        logger.error(f"❌ Swap error: {e}")
        raise HTTPException(status_code=500, detail=str(e))
    finally:
        # Очистка временных файлов
        for p in (target_path, source_path):
            if p and p.exists():
                p.unlink()

@app.post("/swap-with-prompt")
async def swap_face_with_prompt(
    target: UploadFile = File(...),  # BODY
    source: UploadFile = File(...),  # FACE
    custom_prompt: str = Form(...),  # Кастомный промпт
    num_steps: int = Form(20)
):
    """Замена лица с кастомным промптом"""
    target_path = source_path = None
    
    try:
        target_path = save_upload_file(target)
        source_path = save_upload_file(source)
        
        body_img = optimize_image(Image.open(target_path).convert("RGB"))
        face_img = optimize_image(Image.open(source_path).convert("RGB"))
        
        if pipe is not None and lora_loaded:
            # Добавляем требования качества к кастомному промпту
            enhanced_prompt = f"{custom_prompt}. high quality, sharp details, 4k, photorealistic"
            
            inputs = {
                "image": [body_img, face_img],
                "prompt": enhanced_prompt,
                "generator": torch.manual_seed(42),
                "true_cfg_scale": 4.0,
                "negative_prompt": "blurry, low quality, distorted",
                "num_inference_steps": num_steps,
                "guidance_scale": 1.0,
            }
            
            with torch.inference_mode():
                output = pipe(**inputs)
                result_img = output.images[0]
        else:
            result_img = body_img
        
        result_bytes = io.BytesIO()
        result_img.save(result_bytes, format="JPEG", quality=98)
        
        return Response(content=result_bytes.getvalue(), media_type="image/jpeg")
        
    except Exception as e:
        logger.error(f"❌ Swap error: {e}")
        raise HTTPException(status_code=500, detail=str(e))
    finally:
        for p in (target_path, source_path):
            if p and p.exists():
                p.unlink()

@app.get("/health")
async def health():
    """Проверка статуса"""
    return {
        "status": "ok",
        "model_loaded": pipe is not None,
        "lora_loaded": lora_loaded,
        "base_model": BASE_MODEL,
        "lora_file": LORA_FILE
    }

@app.get("/models")
async def get_models():
    """Информация о доступных версиях BFS [citation:2]"""
    return JSONResponse(content={
        "base_model": BASE_MODEL,
        "lora_repo": LORA_REPO,
        "current_lora": LORA_FILE,
        "available_versions": [
            {
                "version": "Face V1",
                "file": "bfs_face_v1_qwen_image_edit_2509.safetensors",
                "order": "Face then Body",
                "description": "Swaps only face, preserves hair"
            },
            {
                "version": "Head V1",
                "file": "bfs_head_v1_qwen_image_edit_2509.safetensors",
                "order": "Face then Body",
                "description": "Full head swap"
            },
            {
                "version": "Head V3 (Recommended for 2509)",
                "file": "bfs_head_v3_qwen_image_edit_2509.safetensors",
                "order": "Body then Face",
                "description": "Most stable for 2509"
            },
            {
                "version": "Head V5 (Recommended for 2511)",
                "file": "bfs_head_v5_2511_merged_version_rank_16_fp16.safetensors",
                "order": "Body then Face",
                "description": "Latest, best expression transfer"
            }
        ],
        "prompts": {
            "head_v5": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k"
        }
    })

@app.on_event("shutdown")
async def shutdown_event():
    """Очистка при остановке"""
    logger.info("🛑 Shutting down...")
    shutil.rmtree(TEMP_DIR, ignore_errors=True)

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=7860)