File size: 1,014 Bytes
1f0acb1
 
 
d5c49aa
1f0acb1
b857d30
1f0acb1
6795ea1
 
 
c779fe7
b5500eb
6795ea1
 
 
 
 
 
 
1f0acb1
b5500eb
4f39df5
871a354
c779fe7
871a354
c779fe7
 
 
 
871a354
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
import os
import streamlit as st
import tempfile


# μ•ˆμ „ν•œ μΊμ‹œ 디렉토리 μ§€μ •
HF_CACHE_DIR = os.path.join(tempfile.gettempdir(), "hf_cache")
os.makedirs(HF_CACHE_DIR, exist_ok=True)

# ν™˜κ²½ λ³€μˆ˜ μ„€μ • (ONLY HF_HOME)
os.environ["HF_HOME"] = HF_CACHE_DIR

# transformers.loadμ—μ„œ cache_dir μ§€μ •
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=HF_CACHE_DIR)
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=HF_CACHE_DIR)


def generate_caption(image_path):
    processor, model = load_blip_model()
    image = Image.open(image_path).convert("RGB")

    inputs = processor(image, return_tensors="pt")
    
    with torch.no_grad():  # βœ… μ„±λŠ₯ μ΅œμ ν™” (inference μ‹œ gradient λΆˆν•„μš”)
        out = model.generate(**inputs)

    return processor.decode(out[0], skip_special_tokens=True)