import streamlit as st from PIL import Image from blip_inference import generate_blip_caption import os st.set_page_config(page_title="Image Caption Generator (BLIP)", page_icon="🖼️") st.title("Image Caption Generator 🖼️ ➡️ 📝") st.write( """ Welcome to the Image Caption Generator! Upload an image and the **Salesforce BLIP** (Bootstrapping Language-Image Pre-training) model will generate an incredibly accurate, descriptive caption for it. """ ) # Sidebar with status with st.sidebar: st.header("System Status") st.success("✅ BLIP Large Model Integrated") st.write("The large model provides maximum accuracy and will download from HuggingFace on your very first inference if it isn't cached locally.") uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image col1, col2 = st.columns(2) with col1: st.subheader("Your Image") image = Image.open(uploaded_file) st.image(image, use_container_width=True) with col2: st.subheader("Generated Caption") with st.spinner("Generating caption (BLIP)..."): # Save uploaded file temporarily for the inference script temp_path = "temp_image.jpg" image.convert("RGB").save(temp_path) try: # Call the new BLIP inference function caption = generate_blip_caption(temp_path) st.success(f"**{caption.capitalize()}**") except Exception as e: st.error(f"Error generating caption: {e}") finally: if os.path.exists(temp_path): os.remove(temp_path) st.markdown("---") st.markdown("*Built with PyTorch, Transformers & Streamlit*")