Spaces:

bep40
/

comic-ai-generator

Sleeping

File size: 13,157 Bytes

import os
import base64
import io
import json
import asyncio
import threading

import gradio as gr
import httpx
import numpy as np
from PIL import Image

# ================= CONFIG =================
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_API_URL = "https://api-inference.huggingface.co"

# ================= MODEL CONFIG =================
DEFAULT_TEXT_MODEL = "Qwen/Qwen2.5-72B-Instruct"
FALLBACK_TEXT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_VISION_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct"
FALLBACK_VISION_MODEL = "llava-hf/llava-1.5-7b-hf"
DEFAULT_IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
FALLBACK_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"

# ================= UTILS =================
def get_auth_headers():
    token = HF_TOKEN
    if not token:
        return {}
    return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

def _run_async(coro):
    """Run async coroutine safely, handling nested event loops."""
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        return asyncio.run(coro)
    result = [None]
    def _worker():
        new_loop = asyncio.new_event_loop()
        asyncio.set_event_loop(new_loop)
        try:
            result[0] = new_loop.run_until_complete(coro)
        finally:
            new_loop.close()
    t = threading.Thread(target=_worker)
    t.start()
    t.join()
    return result[0]

# ================= HF INFERENCE API HELPERS =================
async def _hf_text_generation(prompt, model=DEFAULT_TEXT_MODEL, max_tokens=2048, temperature=0.7, system_prompt=""):
    headers = get_auth_headers()
    if not headers:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN."

    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.append({"role": "user", "content": prompt})

    payload = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stream": False
    }

    async with httpx.AsyncClient(timeout=120.0) as client:
        response = await client.post(
            f"{HF_API_URL}/models/{model}/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=120
        )
        if response.status_code == 200:
            data = response.json()
            return data["choices"][0]["message"]["content"]
        elif response.status_code in (404, 503) and model != FALLBACK_TEXT_MODEL:
            return await _hf_text_generation(prompt, FALLBACK_TEXT_MODEL, max_tokens, temperature, system_prompt)
        else:
            return f"❌ LỖI HF API ({response.status_code}): {response.text[:500]}"

async def _hf_image_generation(prompt, model=DEFAULT_IMAGE_MODEL, width=1024, height=1024, negative_prompt="", seed=None):
    headers = get_auth_headers()
    if not headers:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN."

    payload = {
        "inputs": prompt,
        "parameters": {
            "negative_prompt": negative_prompt or "blurry, low quality, watermark, text, signature, ugly, deformed",
            "width": width,
            "height": height,
            "guidance_scale": 7.5,
            "num_inference_steps": 50
        }
    }
    if seed is not None:
        payload["parameters"]["seed"] = seed

    async with httpx.AsyncClient(timeout=120.0) as client:
        response = await client.post(
            f"{HF_API_URL}/models/{model}",
            headers=headers,
            json=payload,
            timeout=120
        )
        if response.status_code == 200:
            try:
                img = Image.open(io.BytesIO(response.content))
                return img
            except Exception as e:
                return f"❌ Lỗi decode ảnh: {e}"
        elif response.status_code in (404, 503) and model != FALLBACK_IMAGE_MODEL:
            return await _hf_image_generation(prompt, FALLBACK_IMAGE_MODEL, min(width, 512), min(height, 512), negative_prompt, seed)
        else:
            return f"❌ LỖI HF Image API ({response.status_code}): {response.text[:500]}"

async def _hf_vision_chat(messages, model=DEFAULT_VISION_MODEL, max_tokens=1024, temperature=0.3):
    headers = get_auth_headers()
    if not headers:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN."

    payload = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stream": False
    }

    async with httpx.AsyncClient(timeout=120.0) as client:
        response = await client.post(
            f"{HF_API_URL}/models/{model}/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=120
        )
        if response.status_code == 200:
            data = response.json()
            return data["choices"][0]["message"]["content"]
        elif response.status_code in (404, 503) and model != FALLBACK_VISION_MODEL:
            return await _hf_vision_chat(messages, FALLBACK_VISION_MODEL, max_tokens, temperature)
        else:
            return f"❌ LỖI HF Vision API ({response.status_code}): {response.text[:500]}"

# ================= SYNC WRAPPERS =================
def sync_text_gen(prompt, model, max_tokens, temperature, system_prompt):
    if not HF_TOKEN:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN (token HuggingFace của bạn)."
    return _run_async(_hf_text_generation(prompt, model, max_tokens, temperature, system_prompt))

def sync_image_gen(prompt, model, width, height, negative_prompt, seed):
    if not HF_TOKEN:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN (token HuggingFace của bạn)."
    result = _run_async(_hf_image_generation(prompt, model, width, height, negative_prompt, seed))
    return result

def sync_vision_chat(messages_json, model, max_tokens, temperature):
    if not HF_TOKEN:
        return "❌ LỖI: HF_TOKEN chưa được cấu hình. Vui lòng vào Space Settings → Secrets và thêm HF_TOKEN (token HuggingFace của bạn)."
    messages = json.loads(messages_json)
    return _run_async(_hf_vision_chat(messages, model, max_tokens, temperature))

# ================= GRADIO UI =================
with gr.Blocks(title="Comic AI Generator") as demo:
    gr.Markdown("# 🔥 Comic AI Generator - Tạo Truyện Tranh Bằng AI")
    gr.Markdown("Sử dụng Hugging Face Inference API để tạo văn bản, hình ảnh, và phân tích ảnh.")

    if not HF_TOKEN:
        gr.Markdown(
            "⚠️ **Cảnh báo**: HF_TOKEN chưa được cấu hình. "
            "Vui lòng vào [Space Settings → Secrets](https://huggingface.co/spaces/bep40/comic-ai-generator/settings/secrets) "
            "và thêm secret `HF_TOKEN` với giá trị là token HuggingFace của bạn."
        )

    with gr.Tab("📝 Tạo Văn Bản / Cốt Truyện"):
        with gr.Row():
            with gr.Column(scale=2):
                text_prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Nhập ý tưởng cốt truyện hoặc yêu cầu văn bản...")
                text_model = gr.Textbox(label="Model", value=DEFAULT_TEXT_MODEL)
                text_system = gr.Textbox(label="System Prompt (tùy chọn)", lines=2, placeholder="Bạn là một tác giả truyện tranh chuyên nghiệp...")
            with gr.Column(scale=1):
                text_max_tokens = gr.Slider(label="Max Tokens", minimum=64, maximum=4096, value=2048, step=64)
                text_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7, step=0.1)
                text_gen_btn = gr.Button("🚀 Tạo Văn Bản", variant="primary")
        text_output = gr.Textbox(label="Kết quả", lines=12)
        text_gen_btn.click(
            fn=sync_text_gen,
            inputs=[text_prompt, text_model, text_max_tokens, text_temperature, text_system],
            outputs=text_output
        )

    with gr.Tab("🎨 Tạo Hình Ảnh"):
        with gr.Row():
            with gr.Column(scale=2):
                img_prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Mô tả hình ảnh bạn muốn tạo...")
                img_neg_prompt = gr.Textbox(label="Negative Prompt", lines=2, value="blurry, low quality, watermark, text, signature, ugly, deformed")
            with gr.Column(scale=1):
                img_model = gr.Textbox(label="Model", value=DEFAULT_IMAGE_MODEL)
                img_width = gr.Slider(label="Width", minimum=256, maximum=1024, value=1024, step=64)
                img_height = gr.Slider(label="Height", minimum=256, maximum=1024, value=1024, step=64)
                img_seed = gr.Number(label="Seed (tùy chọn)", value=None, precision=0)
                img_gen_btn = gr.Button("🎨 Tạo Hình Ảnh", variant="primary")
        img_output = gr.Image(label="Ảnh tạo ra", type="pil")
        img_gen_btn.click(
            fn=sync_image_gen,
            inputs=[img_prompt, img_model, img_width, img_height, img_neg_prompt, img_seed],
            outputs=img_output
        )

    with gr.Tab("👁️ Phân Tích Ảnh (Vision)"):
        with gr.Row():
            with gr.Column(scale=1):
                vision_image = gr.Image(label="Upload ảnh", type="pil")
                vision_model = gr.Textbox(label="Vision Model", value=DEFAULT_VISION_MODEL)
                vision_task = gr.Dropdown(
                    label="Tác vụ",
                    choices=["detect_characters", "detect_items", "extract_setting", "custom"],
                    value="detect_characters"
                )
                vision_custom = gr.Textbox(label="Câu hỏi tùy chỉnh", lines=2, visible=False)
                vision_max_tokens = gr.Slider(label="Max Tokens", minimum=64, maximum=2048, value=1024, step=64)
                vision_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.3, step=0.1)
                vision_btn = gr.Button("🔍 Phân Tích", variant="primary")
            with gr.Column(scale=2):
                vision_output = gr.Textbox(label="Kết quả phân tích", lines=12)

        def update_vision_visibility(task):
            return gr.update(visible=(task == "custom"))

        vision_task.change(fn=update_vision_visibility, inputs=vision_task, outputs=vision_custom)

        def run_vision(image, task, custom_prompt, model, max_tokens, temperature):
            if image is None:
                return "Vui lòng upload ảnh trước."
            buf = io.BytesIO()
            image.save(buf, format="PNG")
            b64 = base64.b64encode(buf.getvalue()).decode()

            prompts = {
                "detect_characters": "Analyze this image carefully. Identify all main characters/people. For each, return JSON with: name (Vietnamese), physical description, gender, age_category. Return ONLY a JSON array.",
                "detect_items": "Analyze this image. Identify all distinct objects, accessories, props. For each, return JSON with: vi_name (Vietnamese), en_desc (English description). Return ONLY a JSON array.",
                "extract_setting": "Describe the background/setting of this image in detail. Return JSON with: setting (string), isPlainBackground (boolean), key_products (array). Return ONLY a JSON object.",
                "custom": custom_prompt
            }
            task_prompt = prompts.get(task, custom_prompt)
            sys_prompt = "You are an image analysis assistant. Always respond with valid JSON only. No markdown, no explanation, no code fences."

            messages = [{
                "role": "user",
                "content": [
                    {"type": "text", "text": f"{sys_prompt}\n\n{task_prompt}\n\nIMPORTANT: Return ONLY valid JSON."},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
                ]
            }]
            return sync_vision_chat(json.dumps(messages), model, max_tokens, temperature)

        vision_btn.click(
            fn=run_vision,
            inputs=[vision_image, vision_task, vision_custom, vision_model, vision_max_tokens, vision_temperature],
            outputs=vision_output
        )

    with gr.Tab("⚙️ Kiểm Tra API"):
        api_status = gr.JSON(label="Trạng thái API", value={
            "hf_token_configured": bool(HF_TOKEN),
            "hf_token_length": len(HF_TOKEN),
            "text_model": DEFAULT_TEXT_MODEL,
            "vision_model": DEFAULT_VISION_MODEL,
            "image_model": DEFAULT_IMAGE_MODEL,
            "message": "Kiểm tra xem HF_TOKEN đã được cấu hình trong Space Settings > Secrets chưa."
        })

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)