Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,11 +14,17 @@ qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-dist
|
|
| 14 |
|
| 15 |
# Preprocess image for better OCR performance
|
| 16 |
def preprocess_image(image_file):
|
|
|
|
| 17 |
image = np.array(Image.open(image_file).convert("RGB"))
|
|
|
|
|
|
|
| 18 |
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Convert to grayscale
|
| 19 |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Remove noise
|
| 20 |
-
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] #
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Extract text using TrOCR
|
| 24 |
def extract_text_from_handwriting(image):
|
|
|
|
| 14 |
|
| 15 |
# Preprocess image for better OCR performance
|
| 16 |
def preprocess_image(image_file):
|
| 17 |
+
# Convert image to OpenCV format (numpy array)
|
| 18 |
image = np.array(Image.open(image_file).convert("RGB"))
|
| 19 |
+
|
| 20 |
+
# Preprocessing: Grayscale, blur, threshold (to clean up image)
|
| 21 |
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Convert to grayscale
|
| 22 |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Remove noise
|
| 23 |
+
thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Enhance contrast
|
| 24 |
+
|
| 25 |
+
# Convert back to RGB (3-channel) format for compatibility with TrOCR
|
| 26 |
+
preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
|
| 27 |
+
return Image.fromarray(preprocessed_image) # Convert back to PIL format
|
| 28 |
|
| 29 |
# Extract text using TrOCR
|
| 30 |
def extract_text_from_handwriting(image):
|