from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import torch import os import streamlit as st import tempfile # 안전한 캐시 디렉토리 지정 HF_CACHE_DIR = os.path.join(tempfile.gettempdir(), "hf_cache") os.makedirs(HF_CACHE_DIR, exist_ok=True) # 환경 변수 설정 (ONLY HF_HOME) os.environ["HF_HOME"] = HF_CACHE_DIR # transformers.load에서 cache_dir 지정 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=HF_CACHE_DIR) model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=HF_CACHE_DIR) def generate_caption(image_path): processor, model = load_blip_model() image = Image.open(image_path).convert("RGB") inputs = processor(image, return_tensors="pt") with torch.no_grad(): # ✅ 성능 최적화 (inference 시 gradient 불필요) out = model.generate(**inputs) return processor.decode(out[0], skip_special_tokens=True)