Spaces:

vivaceailab
/

post

Runtime error

File size: 6,139 Bytes

# app.py

# app.py

import gradio as gr
import subprocess
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

# ⚙️ flash‐attn 설치 (CUDA 빌드를 건너뜁니다)
subprocess.run(
    'pip install flash-attn --no-build-isolation',
    env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
    shell=True
)

# 1. 장치 설정
device = "cuda" if torch.cuda.is_available() else "cpu"

# 2. Florence 모델 및 프로세서 로드
florence_model = AutoModelForCausalLM.from_pretrained(
    'microsoft/Florence-2-base',
    trust_remote_code=True
).to(device).eval()
florence_processor = AutoProcessor.from_pretrained(
    'microsoft/Florence-2-base',
    trust_remote_code=True
)

# 3. 이미지 설명 생성 함수
def generate_caption(image):
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # 30~50단어 분량의 한국어 상세 설명을 생성하라는 지시문
    instruction = (
        "이 이미지를 30에서 50단어 분량의 한국어로 상세히 설명하세요. "
        "배경, 색상, 질감, 인물의 표정과 의상, 조명, 구도, 분위기 등을 모두 포함하여 서술해 주세요."
    )
    
    inputs = florence_processor(
        text=instruction,
        images=image,
        return_tensors="pt"
    ).to(device)
    
    generated_ids = florence_model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        do_sample=False,
        num_beams=3,
        early_stopping=False,
    )
    
    generated_text = florence_processor.batch_decode(
        generated_ids,
        skip_special_tokens=False
    )[0]
    
    parsed = florence_processor.post_process_generation(
        generated_text,
        task=instruction,
        image_size=(image.width, image.height)
    )
    prompt = parsed[instruction]
    
    # 필요시 "Asian"→"Korean" 교정
    if "Asian" in prompt:
        prompt = prompt.replace("Asian", "Korean")
    
    print("✅ 생성 완료:\n", prompt)
    return prompt

# 4. Gradio 블록으로 인터페이스 구성 (캐리커쳐 버튼 유지)
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as demo:
    gr.Markdown("## 🖼️ 이미지 설명 생성기")
    gr.Markdown(
        "⚠ 현재 CPU 모드로 실행 중이므로 속도가 느릴 수 있습니다. 양해 부탁드립니다."
    )

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="입력 이미지", type="pil")
        with gr.Column():
            # ⇨ lines를 3에서 6으로 늘려 텍스트 박스 높이를 2배로 키움
            caption_output = gr.Textbox(
                label="생성된 설명",
                lines=6,
                show_copy_button=True
            )
            # 오른쪽 하단 '캐리커쳐 만들기' 버튼
            gr.HTML("""
            <div style='margin-top: 10px; text-align: center;'>
                <a href="https://huggingface.co/spaces/VIDraft/stable-diffusion-3.5-large-turboX" target="_blank">
                    <button style='
                        padding: 10px 20px;
                        background-color: #ff9900;
                        color: white;
                        border: none;
                        border-radius: 10px;
                        font-size: 16px;
                        box-shadow: 2px 2px 8px rgba(0,0,0,0.3);
                        cursor: pointer;
                    '>
                        🎨 캐리커쳐 만들기
                    </button>
                </a>
            </div>
            """)

    # 업로드하면 자동으로 generate_caption 호출
    image_input.upload(
        fn=generate_caption,
        inputs=image_input,
        outputs=caption_output
    )

# 5. 웹앱 실행
if __name__ == "__main__":
    demo.launch(debug=True)



# import gradio as gr
# import torch
# from PIL import Image
# from transformers import BlipProcessor, BlipForConditionalGeneration

# # 1. 장치 설정
# device = "cuda" if torch.cuda.is_available() else "cpu"

# # 2. 모델 및 프로세서 로드
# processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# # 3. 이미지 설명 생성 함수
# def generate_caption(image):
#     if image is None:
#         return "이미지를 업로드해주세요."
    
#     # 고속 처리를 위한 리사이즈
#     image = image.resize((384, 384))
    
#     # 설명 생성
#     inputs = processor(images=image, return_tensors="pt").to(device)
#     output_ids = model.generate(**inputs, max_length=50)
#     caption = processor.decode(output_ids[0], skip_special_tokens=True)
#     print("✅ 생성된 설명:", caption)
#     return caption

# # 4. Gradio 인터페이스 구성
# with gr.Blocks(title="이미지 설명 생성기") as demo:
#     gr.Markdown("## 🖼️ 이미지를 업로드하면 설명이 자동 생성됩니다.")

#     with gr.Row():
#         with gr.Column():
#             image_input = gr.Image(label="입력 이미지", type="pil")
#         with gr.Column():
#             caption_output = gr.Textbox(label="생성된 설명", lines=3, show_copy_button=True)
#             # HTML로 버튼 생성
#             gr.HTML("""
#             <div style='margin-top: 10px; text-align: center;'>
#                 <a href="https://huggingface.co/spaces/VIDraft/stable-diffusion-3.5-large-turboX" target="_blank">
#                     <button style='padding: 10px 20px; background-color: #ff9900; color: white; border: none; border-radius: 10px; font-size: 16px; box-shadow: 2px 2px 8px rgba(0,0,0,0.3); cursor: pointer;'>
#                         🎨 캐리커쳐 만들기
#                     </button>
#                 </a>
#             </div>
#             """)

#     # 업로드 → 설명 자동 생성 연결
#     image_input.upload(fn=generate_caption, inputs=image_input, outputs=caption_output)

# # 5. 앱 실행
# demo.launch(debug=True)