File size: 1,565 Bytes
078801f
 
4e14b22
 
 
 
5a8c443
b08c085
5a8c443
b08c085
 
5a8c443
4e14b22
65aba2a
4e14b22
 
 
 
1d1bef1
 
 
 
 
 
5a8c443
 
e8d2c0e
 
4e14b22
 
5a8c443
b59ed0f
c17b215
4e14b22
3120115
4e14b22
 
 
 
 
 
 
 
5a8c443
4e14b22
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#Libraries

import streamlit as st
from transformers import BlipForConditionalGeneration, AutoTokenizer
import torch
from PIL import Image
import torchvision.transforms as transforms

# Load the fine-tuned model and tokenizer
model = BlipForConditionalGeneration.from_pretrained("MLInAi/CartoonCaptionGen")
tokenizer = AutoTokenizer.from_pretrained("MLInAi/CartoonCaptionGen")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to generate caption for the uploaded image
def generate_caption(image):
    # Preprocess the image
    image = Image.open(image).convert("RGB")
    image = image.resize((224, 224))  # Resize the image to match model input size
    
    # Convert the image to a tensor
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    # Generate caption
    output = model.generate(pixel_values=image_tensor)
    caption = tokenizer.decode(output[0], skip_special_tokens=True)
    return caption



# Streamlit app
st.title("Cartoon Caption Generator")

uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg"])

if uploaded_image is not None:
    st.image(uploaded_image, caption='Uploaded Image.', use_column_width=True)
    st.write("")
    st.write("Generating caption...")

    # Generate caption for the uploaded image with the fixed prompt
    caption = generate_caption(uploaded_image)
    st.write("Caption:", caption)