fsa / app.py
ARBAJSSHAIKH's picture
Update app.py
f77d199 verified
# ------------------------------------------------------------
# 1. Import libraries
# ------------------------------------------------------------
# OCR library to read text from images
import pytesseract
# (FOR WINDOWS USERS) explicitly set tesseract.exe location
# Change the path if Tesseract is installed somewhere else
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# For image loading and manipulation
from PIL import Image
# Vector database for storing embeddings locally
import chromadb
# Local sentence embedding model
from sentence_transformers import SentenceTransformer
# Simple web UI framework
import gradio as gr
# Create unique IDs for storing sentences
import uuid
# ------------------------------------------------------------
# 2. Load local embedding model
# ------------------------------------------------------------
# This model converts text into vectors (numbers)
# We use a small, fast model — runs on CPU
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# ------------------------------------------------------------
# 3. Create local ChromaDB database
# ------------------------------------------------------------
# Create Chroma client (local DB in memory by default)
client = chromadb.CloudClient(
api_key='ck-3TKpYcZnQiMFRYMs5XPusnJjcwJ1DekHF5eAK6Eixg3i',
tenant='a8aa043d-7905-4da1-9937-197415021b8c',
database='TEST 1'
)
# Create or access a collection (like a table in DB)
collection = client.create_collection("image_rag_final2")
# ------------------------------------------------------------
# 4. Function: process image and extract text
# ------------------------------------------------------------
def process_image(image):
# Convert uploaded numpy array image into PIL format
img = Image.fromarray(image)
# Run OCR to extract text from image
text = pytesseract.image_to_string(img)
# If no text found
if text.strip() == "":
return "No text detected in image."
# Split OCR text into separate lines/sentences
sentences = [s.strip() for s in text.split("\n") if s.strip()]
# Convert each sentence to vector embedding
embeddings = embedder.encode(sentences).tolist()
# Generate unique ID for each sentence
ids = [str(uuid.uuid4()) for _ in sentences]
# Store sentences & embeddings into Chroma vector DB
collection.add(
documents=sentences,
embeddings=embeddings,
ids=ids
)
# Return extracted text so user can see it
return "Image processed and stored. Extracted text:\n\n" + "\n".join(sentences)
# ------------------------------------------------------------
# 5. Function: answer questions based on stored image text
# ------------------------------------------------------------
def answer_question(question):
# Ask user to type something
if question.strip() == "":
return "Please enter a question."
# Convert question into embedding vector
query_embedding = embedder.encode([question]).tolist()
# Search top 1 similar text from ChromaDB
results = collection.query(
query_embeddings=query_embedding,
n_results=1
)
# If no images were uploaded before asking question
if not results["documents"]:
return "No data yet. Upload an image first."
# Get the best matching sentence
best_sentence = results["documents"][0][0]
# Return answer
return f"Answer (most relevant text):\n{best_sentence}"
# ------------------------------------------------------------
# 6. Build Gradio User Interface
# ------------------------------------------------------------
# Upload image component
image_input = gr.Image(label="Upload Image")
# Show extracted OCR text
ocr_output = gr.Textbox(label="Extracted / Stored Text")
# Ask question box
question_box = gr.Textbox(label="Ask a question about the image")
# Show answer
answer_box = gr.Textbox(label="Answer")
# Two tabs:
# Tab 1: Upload Image & Extract Text
# Tab 2: Ask Question about Image
app = gr.TabbedInterface(
[
gr.Interface(
fn=process_image,
inputs=image_input,
outputs=ocr_output,
title="Upload Image & Extract Text"
),
gr.Interface(
fn=answer_question,
inputs=question_box,
outputs=answer_box,
title="Ask Question About Image"
),
],
tab_names=["Upload Image", "Ask Question"]
)
# Start the web app
app.launch()