ARBAJSSHAIKH commited on
Commit
0a778db
·
verified ·
1 Parent(s): a7e47c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py CHANGED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------------------------------------------------
2
+ # 1. Import libraries
3
+ # ------------------------------------------------------------
4
+
5
+ # OCR library to read text from images
6
+ import pytesseract
7
+
8
+ # (FOR WINDOWS USERS) explicitly set tesseract.exe location
9
+ # Change the path if Tesseract is installed somewhere else
10
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
11
+
12
+ # For image loading and manipulation
13
+ from PIL import Image
14
+
15
+ # Vector database for storing embeddings locally
16
+ import chromadb
17
+
18
+ # Local sentence embedding model
19
+ from sentence_transformers import SentenceTransformer
20
+
21
+ # Simple web UI framework
22
+ import gradio as gr
23
+
24
+ # Create unique IDs for storing sentences
25
+ import uuid
26
+
27
+
28
+ # ------------------------------------------------------------
29
+ # 2. Load local embedding model
30
+ # ------------------------------------------------------------
31
+
32
+ # This model converts text into vectors (numbers)
33
+ # We use a small, fast model — runs on CPU
34
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
35
+
36
+
37
+ # ------------------------------------------------------------
38
+ # 3. Create local ChromaDB database
39
+ # ------------------------------------------------------------
40
+
41
+ # Create Chroma client (local DB in memory by default)
42
+
43
+ client = chromadb.CloudClient(
44
+ api_key='ck-3TKpYcZnQiMFRYMs5XPusnJjcwJ1DekHF5eAK6Eixg3i',
45
+ tenant='a8aa043d-7905-4da1-9937-197415021b8c',
46
+ database='TEST 1'
47
+ )
48
+
49
+ # Create or access a collection (like a table in DB)
50
+
51
+ collection = client.create_collection("image_rag_final1")
52
+
53
+
54
+ # ------------------------------------------------------------
55
+ # 4. Function: process image and extract text
56
+ # ------------------------------------------------------------
57
+
58
+ def process_image(image):
59
+
60
+ # Convert uploaded numpy array image into PIL format
61
+ img = Image.fromarray(image)
62
+
63
+ # Run OCR to extract text from image
64
+ text = pytesseract.image_to_string(img)
65
+
66
+ # If no text found
67
+ if text.strip() == "":
68
+ return "No text detected in image."
69
+
70
+ # Split OCR text into separate lines/sentences
71
+ sentences = [s.strip() for s in text.split("\n") if s.strip()]
72
+
73
+ # Convert each sentence to vector embedding
74
+ embeddings = embedder.encode(sentences).tolist()
75
+
76
+ # Generate unique ID for each sentence
77
+ ids = [str(uuid.uuid4()) for _ in sentences]
78
+
79
+ # Store sentences & embeddings into Chroma vector DB
80
+ collection.add(
81
+ documents=sentences,
82
+ embeddings=embeddings,
83
+ ids=ids
84
+ )
85
+
86
+ # Return extracted text so user can see it
87
+ return "Image processed and stored. Extracted text:\n\n" + "\n".join(sentences)
88
+
89
+
90
+ # ------------------------------------------------------------
91
+ # 5. Function: answer questions based on stored image text
92
+ # ------------------------------------------------------------
93
+
94
+ def answer_question(question):
95
+
96
+ # Ask user to type something
97
+ if question.strip() == "":
98
+ return "Please enter a question."
99
+
100
+ # Convert question into embedding vector
101
+ query_embedding = embedder.encode([question]).tolist()
102
+
103
+ # Search top 1 similar text from ChromaDB
104
+ results = collection.query(
105
+ query_embeddings=query_embedding,
106
+ n_results=1
107
+ )
108
+
109
+ # If no images were uploaded before asking question
110
+ if not results["documents"]:
111
+ return "No data yet. Upload an image first."
112
+
113
+ # Get the best matching sentence
114
+ best_sentence = results["documents"][0][0]
115
+
116
+ # Return answer
117
+ return f"Answer (most relevant text):\n{best_sentence}"
118
+
119
+
120
+ # ------------------------------------------------------------
121
+ # 6. Build Gradio User Interface
122
+ # ------------------------------------------------------------
123
+
124
+ # Upload image component
125
+ image_input = gr.Image(label="Upload Image")
126
+
127
+ # Show extracted OCR text
128
+ ocr_output = gr.Textbox(label="Extracted / Stored Text")
129
+
130
+ # Ask question box
131
+ question_box = gr.Textbox(label="Ask a question about the image")
132
+
133
+ # Show answer
134
+ answer_box = gr.Textbox(label="Answer")
135
+
136
+
137
+ # Two tabs:
138
+ # Tab 1: Upload Image & Extract Text
139
+ # Tab 2: Ask Question about Image
140
+ app = gr.TabbedInterface(
141
+ [
142
+ gr.Interface(
143
+ fn=process_image,
144
+ inputs=image_input,
145
+ outputs=ocr_output,
146
+ title="Upload Image & Extract Text"
147
+ ),
148
+ gr.Interface(
149
+ fn=answer_question,
150
+ inputs=question_box,
151
+ outputs=answer_box,
152
+ title="Ask Question About Image"
153
+ ),
154
+ ],
155
+ tab_names=["Upload Image", "Ask Question"]
156
+ )
157
+
158
+ # Start the web app
159
+ app.launch()