import os # ✅ Fix permissions: move cache/config to /tmp (writable in Spaces) os.environ["HF_HOME"] = "/tmp" os.environ["TRANSFORMERS_CACHE"] = "/tmp" os.environ["STREAMLIT_CACHE_DIR"] = "/tmp" os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit" os.environ["XDG_CONFIG_HOME"] = "/tmp" import streamlit as st from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration # Load BLIP model + processor (cached in /tmp) @st.cache_resource def load_model(): processor = BlipProcessor.from_pretrained( "Salesforce/blip-image-captioning-base", cache_dir="/tmp" ) model = BlipForConditionalGeneration.from_pretrained( "Salesforce/blip-image-captioning-base", cache_dir="/tmp" ) return processor, model processor, model = load_model() # Streamlit UI st.set_page_config(page_title="Image → Text Captioning", page_icon="🖼️") st.title("🖼️ Image to Text (Caption Generator)") st.write("Upload an image and get a text caption generated by a Transformer model 🚀") # Upload image uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="Uploaded Image", use_column_width=True) if st.button("✨ Generate Caption"): with st.spinner("Generating caption... please wait ⏳"): inputs = processor(image, return_tensors="pt") output_ids = model.generate(**inputs, max_new_tokens=30) caption = processor.decode(output_ids[0], skip_special_tokens=True) st.subheader("📝 Generated Caption:") st.success(caption)