Dua Rajper
Update app.py
6828d65 verified
raw
history blame
1.93 kB
import streamlit as st
from PIL import Image
import torch
import easyocr
from transformers import CLIPProcessor, CLIPModel
# ---- Load CLIP Model ---- #
@st.cache_resource
def load_clip_model():
model = CLIPModel.from_pretrained(
"fxmarty/clip-vision-model-tiny",
ignore_mismatched_sizes=True # Fix model size mismatch
)
processor = CLIPProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
return model, processor
model, processor = load_clip_model()
# ---- Load OCR (EasyOCR) ---- #
@st.cache_resource
def load_ocr():
return easyocr.Reader(['en'])
reader = load_ocr()
# ---- Streamlit UI ---- #
st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
st.title("πŸ–ΌοΈ Multimodal AI Assistant")
st.write("Upload an image and ask a question about it!")
# ---- Upload Image ---- #
uploaded_file = st.file_uploader("πŸ“€ Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
# Display Image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Extract Text using OCR
with st.spinner("πŸ” Extracting text from image..."):
extracted_text = reader.readtext(uploaded_file, detail=0)
st.write("### πŸ“ Extracted Text:")
if extracted_text:
st.success(extracted_text)
else:
st.warning("No readable text found in the image.")
# ---- Ask a Question About the Image ---- #
user_question = st.text_input("πŸ€– Ask a question about the image:")
if user_question:
with st.spinner("πŸ” Analyzing image and generating response..."):
inputs = processor(text=[user_question], images=image, return_tensors="pt")
outputs = model.get_image_features(**inputs)
st.write("### πŸ† AI Response:")
st.write("CLIP Model has processed the image! (Further improvements coming soon)")