Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image | |
| import torch | |
| import easyocr | |
| from transformers import CLIPProcessor, CLIPModel | |
| # ---- Load CLIP Model ---- # | |
| def load_clip_model(): | |
| model = CLIPModel.from_pretrained( | |
| "fxmarty/clip-vision-model-tiny", | |
| ignore_mismatched_sizes=True # Fix model size mismatch | |
| ) | |
| processor = CLIPProcessor.from_pretrained("fxmarty/clip-vision-model-tiny") | |
| return model, processor | |
| model, processor = load_clip_model() | |
| # ---- Load OCR (EasyOCR) ---- # | |
| def load_ocr(): | |
| return easyocr.Reader(['en']) | |
| reader = load_ocr() | |
| # ---- Streamlit UI ---- # | |
| st.set_page_config(page_title="Multimodal AI Assistant", layout="wide") | |
| st.title("πΌοΈ Multimodal AI Assistant") | |
| st.write("Upload an image and ask a question about it!") | |
| # ---- Upload Image ---- # | |
| uploaded_file = st.file_uploader("π€ Upload an image", type=["jpg", "png", "jpeg"]) | |
| if uploaded_file is not None: | |
| # Display Image | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Extract Text using OCR | |
| with st.spinner("π Extracting text from image..."): | |
| extracted_text = reader.readtext(uploaded_file, detail=0) | |
| st.write("### π Extracted Text:") | |
| if extracted_text: | |
| st.success(extracted_text) | |
| else: | |
| st.warning("No readable text found in the image.") | |
| # ---- Ask a Question About the Image ---- # | |
| user_question = st.text_input("π€ Ask a question about the image:") | |
| if user_question: | |
| with st.spinner("π Analyzing image and generating response..."): | |
| inputs = processor(text=[user_question], images=image, return_tensors="pt") | |
| outputs = model.get_image_features(**inputs) | |
| st.write("### π AI Response:") | |
| st.write("CLIP Model has processed the image! (Further improvements coming soon)") | |