Dua Rajper commited on
Commit
91487f8
Β·
verified Β·
1 Parent(s): 8bf9999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -62
app.py CHANGED
@@ -1,83 +1,54 @@
1
  import streamlit as st
2
- from PIL import Image
3
- import torch
4
  import easyocr
 
 
 
5
  import numpy as np
6
- import openai # Using OpenAI GPT (or replace with GROQ API)
7
  import io
8
- from transformers import CLIPModel, CLIPImageProcessor
9
 
10
- # βœ… Fix: set_page_config() must be the first Streamlit command
11
  st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
12
 
13
- # ---- Load CLIP Model (Vision Only) ---- #
14
- @st.cache_resource
15
- def load_clip_model():
16
- model = CLIPModel.from_pretrained(
17
- "fxmarty/clip-vision-model-tiny",
18
- ignore_mismatched_sizes=True # βœ… Fix size mismatch
19
- )
20
- processor = CLIPImageProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
21
- return model, processor
22
-
23
- model, processor = load_clip_model()
24
-
25
- # ---- Load OCR (EasyOCR) ---- #
26
- @st.cache_resource
27
- def load_ocr():
28
- return easyocr.Reader(['en'])
29
-
30
- reader = load_ocr()
31
 
32
- # ---- Streamlit UI ---- #
33
- st.title("πŸ–ΌοΈ Multimodal AI Assistant")
34
- st.write("Upload an image, extract text, and ask questions!")
35
 
36
- # ---- Upload Image ---- #
37
- uploaded_file = st.file_uploader("πŸ“€ Upload an image", type=["jpg", "png", "jpeg"])
 
38
 
39
- extracted_text = None # Variable to store extracted text
 
40
 
41
- if uploaded_file is not None:
42
- # Convert file to image format
43
- image = Image.open(uploaded_file).convert("RGB")
44
-
45
- # βœ… Fix: use `use_container_width` instead of `use_column_width`
46
  st.image(image, caption="Uploaded Image", use_container_width=True)
47
 
48
- # βœ… Convert PIL image to NumPy array for EasyOCR
49
- image_np = np.array(image)
50
-
51
- # βœ… Fix: Pass the correct format to EasyOCR
52
- with st.spinner("πŸ” Extracting text from image..."):
53
- extracted_text_list = reader.readtext(image_np, detail=0)
54
-
55
- extracted_text = " ".join(extracted_text_list) # Combine extracted text
56
-
57
- st.write("### πŸ“ Extracted Text:")
58
- if extracted_text:
59
- st.success(extracted_text)
60
- else:
61
- st.warning("No readable text found in the image.")
62
 
63
- # ---- Question Answering Section ---- #
64
- if extracted_text:
65
- user_question = st.text_input("πŸ’‘ Ask a question about the extracted text:")
 
66
 
67
- if user_question:
68
- with st.spinner("πŸ€– Thinking..."):
69
- # Using OpenAI GPT API (replace with GROQ or Hugging Face LLM if needed)
70
- openai.api_key = "YOUR_OPENAI_API_KEY" # Store securely in a .env file
71
 
72
- response = openai.ChatCompletion.create(
 
 
73
  model="gpt-3.5-turbo",
74
  messages=[
75
- {"role": "system", "content": "You are an AI assistant helping answer questions based on extracted text from an image."},
76
- {"role": "user", "content": f"Extracted text: {extracted_text}\n\nQuestion: {user_question}"}
77
  ]
78
  )
79
 
80
- answer = response["choices"][0]["message"]["content"]
81
-
82
- st.write("### πŸ€– AI Answer:")
83
- st.success(answer)
 
1
  import streamlit as st
 
 
2
  import easyocr
3
+ import openai
4
+ from openai import OpenAI
5
+ from PIL import Image
6
  import numpy as np
 
7
  import io
 
8
 
9
+ # βœ… Fix: Ensure set_page_config is the first Streamlit command
10
  st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
11
 
12
+ # βœ… OpenAI API Key Setup (Replace with your actual key)
13
+ client = OpenAI(api_key="your_openai_api_key") # Set your OpenAI API key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # βœ… Initialize OCR Reader
16
+ reader = easyocr.Reader(['en'])
 
17
 
18
+ # βœ… Streamlit App Layout
19
+ st.title("πŸ“Έ Multimodal AI Assistant")
20
+ st.write("Upload an image and ask questions based on the extracted text.")
21
 
22
+ # βœ… File Uploader
23
+ uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
24
 
25
+ if uploaded_file:
26
+ # βœ… Display uploaded image
27
+ image = Image.open(uploaded_file)
 
 
28
  st.image(image, caption="Uploaded Image", use_container_width=True)
29
 
30
+ # βœ… Convert image to a format that EasyOCR supports
31
+ image_bytes = io.BytesIO(uploaded_file.getvalue()).read()
32
+ extracted_text = reader.readtext(image_bytes, detail=0)
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # βœ… Show extracted text
35
+ extracted_text_str = " ".join(extracted_text) if extracted_text else "No text found"
36
+ st.subheader("πŸ“ Extracted Text:")
37
+ st.write(extracted_text_str)
38
 
39
+ # βœ… Ask a question about the extracted text
40
+ user_query = st.text_input("Ask a question about the extracted text:")
 
 
41
 
42
+ if user_query:
43
+ with st.spinner("Thinking... πŸ’­"):
44
+ response = client.chat.completions.create(
45
  model="gpt-3.5-turbo",
46
  messages=[
47
+ {"role": "system", "content": "You are an AI assistant analyzing extracted text from images."},
48
+ {"role": "user", "content": f"Extracted text: {extracted_text_str}\n\nUser question: {user_query}"}
49
  ]
50
  )
51
 
52
+ answer = response.choices[0].message.content
53
+ st.subheader("πŸ€– AI Answer:")
54
+ st.write(answer)