WeightedAI
/

Persian_OCR

Model card Files Files and versions

farbodpya commited on Sep 27, 2025

Commit

ba0b33a

·

verified ·

1 Parent(s): 096ba65

Update README.md

Files changed (1) hide show

README.md +79 -1

README.md CHANGED Viewed

@@ -19,4 +19,82 @@ trained with CTC loss to extract text from images.
 ## Installation
 ```bash
-pip install torch torchvision huggingface_hub

 ## Installation
 ```bash
+pip install torch torchvision huggingface_hub
+## Usage Example
+import torch
+from PIL import Image
+import torchvision.transforms.functional as TF
+import cv2
+import matplotlib.pyplot as plt
+from huggingface_hub import hf_hub_download
+# --- Load model from HF ---
+model_path = hf_hub_download(repo_id="farbodpya/Persian-OCR", filename="pytorch_model.bin")
+model = CNN_Transformer_OCR(num_classes=len(idx_to_char)+1).to(device)
+model.load_state_dict(torch.load(model_path, map_location=device))
+model.eval()
+# --- Define transforms ---
+class OCRTestTransform:
+    def __init__(self, img_height=64, max_width=1600):
+        self.img_height = img_height
+        self.max_width = max_width
+    def __call__(self, img):
+        img = img.convert("L")
+        w, h = img.size
+        new_w = int(w * self.img_height / h)
+        img = img.resize((min(new_w, self.max_width), self.img_height), Image.BICUBIC)
+        new_img = Image.new("L", (self.max_width, self.img_height), 255)
+        new_img.paste(img, (0, 0))
+        img = TF.to_tensor(new_img)
+        img = TF.normalize(img, (0.5,), (0.5,))
+        return img
+transform_test = OCRTestTransform()
+# --- Line segmentation ---
+def segment_lines_precise(image_path, min_line_height=12, margin=6, visualize=False):
+    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (img.shape[1]//30, 1))
+    morphed = cv2.dilate(binary, kernel, iterations=1)
+    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
+    lines = []
+    for ctr in contours:
+        x, y, w, h = cv2.boundingRect(ctr)
+        if h < min_line_height: continue
+        y1 = max(0, y - margin)
+        y2 = min(img.shape[0], y + h + margin)
+        line_img = img[y1:y2, x:x+w]
+        lines.append(Image.fromarray(line_img))
+    if visualize:
+        for i, line_img in enumerate(lines):
+            plt.figure(figsize=(12,2))
+            plt.imshow(line_img, cmap='gray')
+            plt.axis('off')
+            plt.title(f"Line {i+1}")
+            plt.show()
+    return lines
+# --- OCR function ---
+def ocr_page(image_path, visualize=False):
+    lines = segment_lines_precise(image_path, visualize=visualize)
+    all_texts = []
+    for idx, line_img in enumerate(lines, 1):
+        img_tensor = transform_test(line_img).unsqueeze(0).to(device)
+        with torch.no_grad():
+            outputs = model(img_tensor)
+        pred_text = greedy_decode(outputs, idx_to_char)[0]
+        all_texts.append(pred_text)
+        print(f"Line {idx}: {pred_text}")
+    return "\n".join(all_texts)
+# --- Example ---
+img_path = "example.png"
+final_text = ocr_page(img_path, visualize=True)
+print("\n=== Final OCR Page ===\n", final_text)