Spaces:

ARBAJSSHAIKH
/

fsa

Runtime error

App Files Files Community

ARBAJSSHAIKH commited on Jan 7

Commit

0a778db

verified ·

1 Parent(s): a7e47c7

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -0

app.py CHANGED Viewed

	@@ -0,0 +1,159 @@

+# ------------------------------------------------------------
+# 1. Import libraries
+# ------------------------------------------------------------
+# OCR library to read text from images
+import pytesseract
+# (FOR WINDOWS USERS) explicitly set tesseract.exe location
+# Change the path if Tesseract is installed somewhere else
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+# For image loading and manipulation
+from PIL import Image
+# Vector database for storing embeddings locally
+import chromadb
+# Local sentence embedding model
+from sentence_transformers import SentenceTransformer
+# Simple web UI framework
+import gradio as gr
+# Create unique IDs for storing sentences
+import uuid
+# ------------------------------------------------------------
+# 2. Load local embedding model
+# ------------------------------------------------------------
+# This model converts text into vectors (numbers)
+# We use a small, fast model — runs on CPU
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+# ------------------------------------------------------------
+# 3. Create local ChromaDB database
+# ------------------------------------------------------------
+# Create Chroma client (local DB in memory by default)
+client = chromadb.CloudClient(
+  api_key='ck-3TKpYcZnQiMFRYMs5XPusnJjcwJ1DekHF5eAK6Eixg3i',
+  tenant='a8aa043d-7905-4da1-9937-197415021b8c',
+  database='TEST 1'
+)
+# Create or access a collection (like a table in DB)
+collection = client.create_collection("image_rag_final1")
+# ------------------------------------------------------------
+# 4. Function: process image and extract text
+# ------------------------------------------------------------
+def process_image(image):
+    # Convert uploaded numpy array image into PIL format
+    img = Image.fromarray(image)
+    # Run OCR to extract text from image
+    text = pytesseract.image_to_string(img)
+    # If no text found
+    if text.strip() == "":
+        return "No text detected in image."
+    # Split OCR text into separate lines/sentences
+    sentences = [s.strip() for s in text.split("\n") if s.strip()]
+    # Convert each sentence to vector embedding
+    embeddings = embedder.encode(sentences).tolist()
+    # Generate unique ID for each sentence
+    ids = [str(uuid.uuid4()) for _ in sentences]
+    # Store sentences & embeddings into Chroma vector DB
+    collection.add(
+        documents=sentences,
+        embeddings=embeddings,
+        ids=ids
+    )
+    # Return extracted text so user can see it
+    return "Image processed and stored. Extracted text:\n\n" + "\n".join(sentences)
+# ------------------------------------------------------------
+# 5. Function: answer questions based on stored image text
+# ------------------------------------------------------------
+def answer_question(question):
+    # Ask user to type something
+    if question.strip() == "":
+        return "Please enter a question."
+    # Convert question into embedding vector
+    query_embedding = embedder.encode([question]).tolist()
+    # Search top 1 similar text from ChromaDB
+    results = collection.query(
+        query_embeddings=query_embedding,
+        n_results=1
+    )
+    # If no images were uploaded before asking question
+    if not results["documents"]:
+        return "No data yet. Upload an image first."
+    # Get the best matching sentence
+    best_sentence = results["documents"][0][0]
+    # Return answer
+    return f"Answer (most relevant text):\n{best_sentence}"
+# ------------------------------------------------------------
+# 6. Build Gradio User Interface
+# ------------------------------------------------------------
+# Upload image component
+image_input = gr.Image(label="Upload Image")
+# Show extracted OCR text
+ocr_output = gr.Textbox(label="Extracted / Stored Text")
+# Ask question box
+question_box = gr.Textbox(label="Ask a question about the image")
+# Show answer
+answer_box = gr.Textbox(label="Answer")
+# Two tabs:
+# Tab 1: Upload Image & Extract Text
+# Tab 2: Ask Question about Image
+app = gr.TabbedInterface(
+    [
+        gr.Interface(
+            fn=process_image,
+            inputs=image_input,
+            outputs=ocr_output,
+            title="Upload Image & Extract Text"
+        ),
+        gr.Interface(
+            fn=answer_question,
+            inputs=question_box,
+            outputs=answer_box,
+            title="Ask Question About Image"
+        ),
+    ],
+    tab_names=["Upload Image", "Ask Question"]
+)
+# Start the web app
+app.launch()