Dua Rajper
Create app.py
09c368a verified
raw
history blame
1.61 kB
import streamlit as st
from PIL import Image
import easyocr
from transformers import pipeline, AutoTokenizer, AutoModel
# Load CLIP model
@st.cache_resource
def load_clip_model():
pipe = pipeline("feature-extraction", model="fxmarty/clip-vision-model-tiny")
tokenizer = AutoTokenizer.from_pretrained("fxmarty/clip-vision-model-tiny")
model = AutoModel.from_pretrained("fxmarty/clip-vision-model-tiny")
return pipe, tokenizer, model
pipe, tokenizer, model = load_clip_model()
# Initialize OCR
@st.cache_resource
def load_ocr():
return easyocr.Reader(['en'])
reader = load_ocr()
# Streamlit App
st.title("๐Ÿ–ผ๏ธ Multimodal AI Assistant")
st.write("Upload an image and ask a question about it!")
# Upload image
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
# Display Image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Extract text using OCR
with st.spinner("Extracting text from image..."):
extracted_text = reader.readtext(uploaded_file, detail=0)
st.write("### ๐Ÿ“ Extracted Text:", extracted_text)
# User asks a question
user_question = st.text_input("๐Ÿค– Ask a question about the image:")
if user_question:
with st.spinner("Analyzing image and question..."):
inputs = tokenizer(user_question, return_tensors="pt")
outputs = model(**inputs)
st.write("### ๐Ÿ† AI Response:")
st.write("CLIP Model Processed the Input! (Further improvements coming soon)")