import os

# ✅ Fix permissions: move cache/config to /tmp (writable in Spaces)
os.environ["HF_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp"
os.environ["STREAMLIT_CACHE_DIR"] = "/tmp"
os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
os.environ["XDG_CONFIG_HOME"] = "/tmp"

import streamlit as st
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load BLIP model + processor (cached in /tmp)
@st.cache_resource
def load_model():
    processor = BlipProcessor.from_pretrained(
        "Salesforce/blip-image-captioning-base",
        cache_dir="/tmp"
    )
    model = BlipForConditionalGeneration.from_pretrained(
        "Salesforce/blip-image-captioning-base",
        cache_dir="/tmp"
    )
    return processor, model

processor, model = load_model()

# Streamlit UI
st.set_page_config(page_title="Image → Text Captioning", page_icon="🖼️")
st.title("🖼️ Image to Text (Caption Generator)")
st.write("Upload an image and get a text caption generated by a Transformer model 🚀")

# Upload image
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_column_width=True)

    if st.button("✨ Generate Caption"):
        with st.spinner("Generating caption... please wait ⏳"):
            inputs = processor(image, return_tensors="pt")
            output_ids = model.generate(**inputs, max_new_tokens=30)
            caption = processor.decode(output_ids[0], skip_special_tokens=True)

        st.subheader("📝 Generated Caption:")
        st.success(caption)