import streamlit as st from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch # Load the processor and model @st.cache_resource def load_model(): processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") return processor, model # Function to generate captions def generate_caption(image,max_new_tokens=20): processor, model = load_model() inputs = processor(image, return_tensors="pt") out = model.generate(**inputs, max_new_tokens=max_new_tokens) return processor.decode(out[0], skip_special_tokens=True) # Streamlit UI st.title("Image Captioning with BLIP") # Upload image uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image with size 400x400 image = Image.open(uploaded_file).convert('RGB') resized_image = image.resize((400, 400)) st.image(resized_image, caption="Uploaded Image", use_column_width=False) # Generate caption if st.button("Generate Caption"): caption = generate_caption(image) st.write(f"**Caption:** {caption}")