Spaces:

ARBAJSSHAIKH
/

fsa

Sleeping

App Files Files Community

fsa / app.py

ARBAJSSHAIKH

Update app.py

f77d199 verified about 1 month ago

raw

history blame contribute delete

4.52 kB

	# ------------------------------------------------------------
	# 1. Import libraries
	# ------------------------------------------------------------

	# OCR library to read text from images
	import pytesseract

	# (FOR WINDOWS USERS) explicitly set tesseract.exe location
	# Change the path if Tesseract is installed somewhere else
	pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

	# For image loading and manipulation
	from PIL import Image

	# Vector database for storing embeddings locally
	import chromadb

	# Local sentence embedding model
	from sentence_transformers import SentenceTransformer

	# Simple web UI framework
	import gradio as gr

	# Create unique IDs for storing sentences
	import uuid


	# ------------------------------------------------------------
	# 2. Load local embedding model
	# ------------------------------------------------------------

	# This model converts text into vectors (numbers)
	# We use a small, fast model — runs on CPU
	embedder = SentenceTransformer("all-MiniLM-L6-v2")


	# ------------------------------------------------------------
	# 3. Create local ChromaDB database
	# ------------------------------------------------------------

	# Create Chroma client (local DB in memory by default)

	client = chromadb.CloudClient(
	api_key='ck-3TKpYcZnQiMFRYMs5XPusnJjcwJ1DekHF5eAK6Eixg3i',
	tenant='a8aa043d-7905-4da1-9937-197415021b8c',
	database='TEST 1'
	)

	# Create or access a collection (like a table in DB)

	collection = client.create_collection("image_rag_final2")


	# ------------------------------------------------------------
	# 4. Function: process image and extract text
	# ------------------------------------------------------------

	def process_image(image):

	# Convert uploaded numpy array image into PIL format
	img = Image.fromarray(image)

	# Run OCR to extract text from image
	text = pytesseract.image_to_string(img)

	# If no text found
	if text.strip() == "":
	return "No text detected in image."

	# Split OCR text into separate lines/sentences
	sentences = [s.strip() for s in text.split("\n") if s.strip()]

	# Convert each sentence to vector embedding
	embeddings = embedder.encode(sentences).tolist()

	# Generate unique ID for each sentence
	ids = [str(uuid.uuid4()) for _ in sentences]

	# Store sentences & embeddings into Chroma vector DB
	collection.add(
	documents=sentences,
	embeddings=embeddings,
	ids=ids
	)

	# Return extracted text so user can see it
	return "Image processed and stored. Extracted text:\n\n" + "\n".join(sentences)


	# ------------------------------------------------------------
	# 5. Function: answer questions based on stored image text
	# ------------------------------------------------------------

	def answer_question(question):

	# Ask user to type something
	if question.strip() == "":
	return "Please enter a question."

	# Convert question into embedding vector
	query_embedding = embedder.encode([question]).tolist()

	# Search top 1 similar text from ChromaDB
	results = collection.query(
	query_embeddings=query_embedding,
	n_results=1
	)

	# If no images were uploaded before asking question
	if not results["documents"]:
	return "No data yet. Upload an image first."

	# Get the best matching sentence
	best_sentence = results["documents"][0][0]

	# Return answer
	return f"Answer (most relevant text):\n{best_sentence}"


	# ------------------------------------------------------------
	# 6. Build Gradio User Interface
	# ------------------------------------------------------------

	# Upload image component
	image_input = gr.Image(label="Upload Image")

	# Show extracted OCR text
	ocr_output = gr.Textbox(label="Extracted / Stored Text")

	# Ask question box
	question_box = gr.Textbox(label="Ask a question about the image")

	# Show answer
	answer_box = gr.Textbox(label="Answer")


	# Two tabs:
	# Tab 1: Upload Image & Extract Text
	# Tab 2: Ask Question about Image
	app = gr.TabbedInterface(
	[
	gr.Interface(
	fn=process_image,
	inputs=image_input,
	outputs=ocr_output,
	title="Upload Image & Extract Text"
	),
	gr.Interface(
	fn=answer_question,
	inputs=question_box,
	outputs=answer_box,
	title="Ask Question About Image"
	),
	],
	tab_names=["Upload Image", "Ask Question"]
	)

	# Start the web app
	app.launch()