| import streamlit as st |
| from PIL import Image |
| import torch |
| import easyocr |
| import numpy as np |
| import openai |
| import io |
| from transformers import CLIPModel, CLIPImageProcessor |
|
|
| |
| st.set_page_config(page_title="Multimodal AI Assistant", layout="wide") |
|
|
| |
| @st.cache_resource |
| def load_clip_model(): |
| model = CLIPModel.from_pretrained( |
| "fxmarty/clip-vision-model-tiny", |
| ignore_mismatched_sizes=True |
| ) |
| processor = CLIPImageProcessor.from_pretrained("fxmarty/clip-vision-model-tiny") |
| return model, processor |
|
|
| model, processor = load_clip_model() |
|
|
| |
| @st.cache_resource |
| def load_ocr(): |
| return easyocr.Reader(['en']) |
|
|
| reader = load_ocr() |
|
|
| |
| st.title("πΌοΈ Multimodal AI Assistant") |
| st.write("Upload an image, extract text, and ask questions!") |
|
|
| |
| uploaded_file = st.file_uploader("π€ Upload an image", type=["jpg", "png", "jpeg"]) |
|
|
| extracted_text = None |
|
|
| if uploaded_file is not None: |
| |
| image = Image.open(uploaded_file).convert("RGB") |
|
|
| |
| st.image(image, caption="Uploaded Image", use_container_width=True) |
|
|
| |
| image_np = np.array(image) |
|
|
| |
| with st.spinner("π Extracting text from image..."): |
| extracted_text_list = reader.readtext(image_np, detail=0) |
|
|
| extracted_text = " ".join(extracted_text_list) |
|
|
| st.write("### π Extracted Text:") |
| if extracted_text: |
| st.success(extracted_text) |
| else: |
| st.warning("No readable text found in the image.") |
|
|
| |
| if extracted_text: |
| user_question = st.text_input("π‘ Ask a question about the extracted text:") |
|
|
| if user_question: |
| with st.spinner("π€ Thinking..."): |
| |
| openai.api_key = "YOUR_OPENAI_API_KEY" |
|
|
| response = openai.ChatCompletion.create( |
| model="gpt-3.5-turbo", |
| messages=[ |
| {"role": "system", "content": "You are an AI assistant helping answer questions based on extracted text from an image."}, |
| {"role": "user", "content": f"Extracted text: {extracted_text}\n\nQuestion: {user_question}"} |
| ] |
| ) |
|
|
| answer = response["choices"][0]["message"]["content"] |
| |
| st.write("### π€ AI Answer:") |
| st.success(answer) |
|
|