Spaces:

Zeeshan24
/

Question-Checker

Build error

Zeeshan24 commited on Dec 17, 2024

Commit

5b20f3a

verified ·

1 Parent(s): 421fc43

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,11 +14,17 @@ qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-dist
 # Preprocess image for better OCR performance
 def preprocess_image(image_file):
     image = np.array(Image.open(image_file).convert("RGB"))
     gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
     blurred = cv2.GaussianBlur(gray, (5, 5), 0)  # Remove noise
-    thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]  # Increase contrast
-    return Image.fromarray(thresh)  # Convert back to PIL format
 # Extract text using TrOCR
 def extract_text_from_handwriting(image):

 # Preprocess image for better OCR performance
 def preprocess_image(image_file):
+    # Convert image to OpenCV format (numpy array)
     image = np.array(Image.open(image_file).convert("RGB"))
+    # Preprocessing: Grayscale, blur, threshold (to clean up image)
     gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
     blurred = cv2.GaussianBlur(gray, (5, 5), 0)  # Remove noise
+    thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]  # Enhance contrast
+    # Convert back to RGB (3-channel) format for compatibility with TrOCR
+    preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
+    return Image.fromarray(preprocessed_image)  # Convert back to PIL format
 # Extract text using TrOCR
 def extract_text_from_handwriting(image):