Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image | |
| import easyocr | |
| from transformers import pipeline, AutoTokenizer, AutoModel | |
| # Load CLIP model | |
| def load_clip_model(): | |
| pipe = pipeline("feature-extraction", model="fxmarty/clip-vision-model-tiny") | |
| tokenizer = AutoTokenizer.from_pretrained("fxmarty/clip-vision-model-tiny") | |
| model = AutoModel.from_pretrained("fxmarty/clip-vision-model-tiny") | |
| return pipe, tokenizer, model | |
| pipe, tokenizer, model = load_clip_model() | |
| # Initialize OCR | |
| def load_ocr(): | |
| return easyocr.Reader(['en']) | |
| reader = load_ocr() | |
| # Streamlit App | |
| st.title("๐ผ๏ธ Multimodal AI Assistant") | |
| st.write("Upload an image and ask a question about it!") | |
| # Upload image | |
| uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
| if uploaded_file is not None: | |
| # Display Image | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Extract text using OCR | |
| with st.spinner("Extracting text from image..."): | |
| extracted_text = reader.readtext(uploaded_file, detail=0) | |
| st.write("### ๐ Extracted Text:", extracted_text) | |
| # User asks a question | |
| user_question = st.text_input("๐ค Ask a question about the image:") | |
| if user_question: | |
| with st.spinner("Analyzing image and question..."): | |
| inputs = tokenizer(user_question, return_tensors="pt") | |
| outputs = model(**inputs) | |
| st.write("### ๐ AI Response:") | |
| st.write("CLIP Model Processed the Input! (Further improvements coming soon)") | |