Spaces:

badru
/

MMSAPI

Sleeping

App Files Files Community

badru commited on Dec 23, 2024

Commit

c816f98

verified ·

1 Parent(s): 4813fb1

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -37

app.py CHANGED Viewed

@@ -1,7 +1,15 @@
 import streamlit as st
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
-import json
 # Load the model and processor
 @st.cache_resource
@@ -12,53 +20,75 @@ def load_model():
 processor, model = load_model()
-# Check if the request is an API call
-if st.runtime.scriptrunner.script_run_context.is_running_with_auth:
-    import io
-    from fastapi import FastAPI, File, UploadFile
-    from fastapi.responses import JSONResponse
-    app = FastAPI()
-    @app.post("/process_image")
-    async def process_image(image: UploadFile = File(...)):
-        try:
-            # Read the uploaded image
-            image_data = await image.read()
-            image = Image.open(io.BytesIO(image_data)).convert("RGB")
-            # Perform OCR
-            pixel_values = processor(images=image, return_tensors="pt").pixel_values
             generated_ids = model.generate(pixel_values)
             generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-            # Return extracted text as JSON
-            return JSONResponse(content={"extracted_text": generated_text})
-        except Exception as e:
-            return JSONResponse(content={"error": str(e)}, status_code=500)
-else:
-    # Streamlit UI for manual testing
-    st.title("OCR API Service")
-    uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
-    if uploaded_file is not None:
-        try:
-            # Load and display the uploaded image
-            image = Image.open(uploaded_file).convert("RGB")
-            st.image(image, caption="Uploaded Image", use_column_width=True)
-            # Perform OCR
-            pixel_values = processor(images=image, return_tensors="pt").pixel_values
             generated_ids = model.generate(pixel_values)
             generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-            # Display extracted text
-            st.subheader("Extracted Text:")
-            st.text(generated_text)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
-    else:
-        st.info("Please upload an image to start the OCR process.")

 import streamlit as st
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import JSONResponse
+import uvicorn
+import numpy as np
+import cv2
+import io
+# Create a FastAPI app instance
+app = FastAPI()
 # Load the model and processor
 @st.cache_resource
 processor, model = load_model()
+# Function to preprocess image and detect lines (used for multiline OCR)
+def detect_lines(image, min_height=20, min_width=100):
+    image_np = np.array(image)
+    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    dilated = cv2.dilate(binary, kernel, iterations=1)
+    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    bounding_boxes = sorted([cv2.boundingRect(c) for c in contours], key=lambda b: b[1])
+    line_images = [image_np[y:y+h, x:x+w] for x, y, w, h in bounding_boxes if h >= min_height and w >= min_width]
+    return line_images
+# FastAPI endpoint to handle image processing
+@app.post("/process_image")
+async def process_image(image: UploadFile = File(...)):
+    try:
+        # Read the uploaded image
+        image_data = await image.read()
+        image = Image.open(io.BytesIO(image_data)).convert("RGB")
+        # Detect lines and process each line
+        line_images = detect_lines(image, min_height=30, min_width=100)
+        extracted_text = ""
+        for line_img in line_images:
+            line_pil = Image.fromarray(line_img)
+            pixel_values = processor(images=line_pil, return_tensors="pt").pixel_values
             generated_ids = model.generate(pixel_values)
             generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            extracted_text += generated_text + "\n"
+        # Return extracted text as JSON
+        return JSONResponse(content={"extracted_text": extracted_text.strip()})
+    except Exception as e:
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+# Streamlit UI for testing (optional)
+st.title("OCR API Service with Multiline Support")
+uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    try:
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_column_width=True)
+        # Detect lines in the image
+        st.write("Detecting lines...")
+        line_images = detect_lines(image, min_height=30, min_width=100)
+        st.write(f"Detected {len(line_images)} lines in the image.")
+        # Perform OCR on each detected line
+        extracted_text = ""
+        for idx, line_img in enumerate(line_images):
+            line_pil = Image.fromarray(line_img)
+            pixel_values = processor(images=line_pil, return_tensors="pt").pixel_values
             generated_ids = model.generate(pixel_values)
             generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            extracted_text += f"{generated_text}\n"
+        # Display extracted text
+        st.subheader("Extracted Text:")
+        st.text_area("Output Text", extracted_text.strip(), height=300)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+# Run the FastAPI app
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)