Dua Rajper commited on
Commit
e9ed06c
Β·
verified Β·
1 Parent(s): 777a3bb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ import easyocr
5
+ import numpy as np
6
+ import openai # Using OpenAI GPT (or replace with GROQ API)
7
+ import io
8
+ from transformers import CLIPModel, CLIPImageProcessor
9
+
10
+ # βœ… Fix: set_page_config() must be the first Streamlit command
11
+ st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
12
+
13
+ # ---- Load CLIP Model (Vision Only) ---- #
14
+ @st.cache_resource
15
+ def load_clip_model():
16
+ model = CLIPModel.from_pretrained(
17
+ "fxmarty/clip-vision-model-tiny",
18
+ ignore_mismatched_sizes=True # βœ… Fix size mismatch
19
+ )
20
+ processor = CLIPImageProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
21
+ return model, processor
22
+
23
+ model, processor = load_clip_model()
24
+
25
+ # ---- Load OCR (EasyOCR) ---- #
26
+ @st.cache_resource
27
+ def load_ocr():
28
+ return easyocr.Reader(['en'])
29
+
30
+ reader = load_ocr()
31
+
32
+ # ---- Streamlit UI ---- #
33
+ st.title("πŸ–ΌοΈ Multimodal AI Assistant")
34
+ st.write("Upload an image, extract text, and ask questions!")
35
+
36
+ # ---- Upload Image ---- #
37
+ uploaded_file = st.file_uploader("πŸ“€ Upload an image", type=["jpg", "png", "jpeg"])
38
+
39
+ extracted_text = None # Variable to store extracted text
40
+
41
+ if uploaded_file is not None:
42
+ # Convert file to image format
43
+ image = Image.open(uploaded_file).convert("RGB")
44
+
45
+ # βœ… Fix: use `use_container_width` instead of `use_column_width`
46
+ st.image(image, caption="Uploaded Image", use_container_width=True)
47
+
48
+ # βœ… Convert PIL image to NumPy array for EasyOCR
49
+ image_np = np.array(image)
50
+
51
+ # βœ… Fix: Pass the correct format to EasyOCR
52
+ with st.spinner("πŸ” Extracting text from image..."):
53
+ extracted_text_list = reader.readtext(image_np, detail=0)
54
+
55
+ extracted_text = " ".join(extracted_text_list) # Combine extracted text
56
+
57
+ st.write("### πŸ“ Extracted Text:")
58
+ if extracted_text:
59
+ st.success(extracted_text)
60
+ else:
61
+ st.warning("No readable text found in the image.")
62
+
63
+ # ---- Question Answering Section ---- #
64
+ if extracted_text:
65
+ user_question = st.text_input("πŸ’‘ Ask a question about the extracted text:")
66
+
67
+ if user_question:
68
+ with st.spinner("πŸ€– Thinking..."):
69
+ # Using OpenAI GPT API (replace with GROQ or Hugging Face LLM if needed)
70
+ openai.api_key = "YOUR_OPENAI_API_KEY" # Store securely in a .env file
71
+
72
+ response = openai.ChatCompletion.create(
73
+ model="gpt-3.5-turbo",
74
+ messages=[
75
+ {"role": "system", "content": "You are an AI assistant helping answer questions based on extracted text from an image."},
76
+ {"role": "user", "content": f"Extracted text: {extracted_text}\n\nQuestion: {user_question}"}
77
+ ]
78
+ )
79
+
80
+ answer = response["choices"][0]["message"]["content"]
81
+
82
+ st.write("### πŸ€– AI Answer:")
83
+ st.success(answer)