Spaces:
Sleeping
Sleeping
File size: 1,929 Bytes
09c368a 66544f4 6828d65 09c368a 6828d65 09c368a 6828d65 66544f4 09c368a 66544f4 09c368a 6828d65 09c368a 6828d65 09c368a 6828d65 09c368a 6828d65 09c368a 6828d65 09c368a 6828d65 66544f4 09c368a 6828d65 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | import streamlit as st
from PIL import Image
import torch
import easyocr
from transformers import CLIPProcessor, CLIPModel
# ---- Load CLIP Model ---- #
@st.cache_resource
def load_clip_model():
model = CLIPModel.from_pretrained(
"fxmarty/clip-vision-model-tiny",
ignore_mismatched_sizes=True # Fix model size mismatch
)
processor = CLIPProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
return model, processor
model, processor = load_clip_model()
# ---- Load OCR (EasyOCR) ---- #
@st.cache_resource
def load_ocr():
return easyocr.Reader(['en'])
reader = load_ocr()
# ---- Streamlit UI ---- #
st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
st.title("๐ผ๏ธ Multimodal AI Assistant")
st.write("Upload an image and ask a question about it!")
# ---- Upload Image ---- #
uploaded_file = st.file_uploader("๐ค Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
# Display Image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Extract Text using OCR
with st.spinner("๐ Extracting text from image..."):
extracted_text = reader.readtext(uploaded_file, detail=0)
st.write("### ๐ Extracted Text:")
if extracted_text:
st.success(extracted_text)
else:
st.warning("No readable text found in the image.")
# ---- Ask a Question About the Image ---- #
user_question = st.text_input("๐ค Ask a question about the image:")
if user_question:
with st.spinner("๐ Analyzing image and generating response..."):
inputs = processor(text=[user_question], images=image, return_tensors="pt")
outputs = model.get_image_features(**inputs)
st.write("### ๐ AI Response:")
st.write("CLIP Model has processed the image! (Further improvements coming soon)")
|