Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

App Files Files Community

UniquePratham commited on Sep 27, 2024

Commit

9919fac

verified ·

1 Parent(s): 3cb2a3f

Update ocr_cpu.py

Browse files

Files changed (1) hide show

ocr_cpu.py +27 -63

ocr_cpu.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
 from transformers import AutoModel, AutoTokenizer
 import torch
 # Load model and tokenizer
-# model_name = "ucaslcl/GOT-OCR2_0"
-model_name = "srimanth-d/GOT_CPU"
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name, trust_remote_code=True, return_tensors='pt'
-)
 # Load the model
 model = AutoModel.from_pretrained(
@@ -20,84 +18,35 @@ model = AutoModel.from_pretrained(
 # Ensure the model is in evaluation mode and loaded on CPU
 device = torch.device("cpu")
-dtype = torch.float32  # Use float32 on CPU
 model = model.eval()
-# OCR function
 def extract_text_got(uploaded_file):
     """Use GOT-OCR2.0 model to extract text from the uploaded image."""
     temp_file_path = 'temp_image.jpg'
     try:
         # Save the uploaded file temporarily
         with open(temp_file_path, 'wb') as temp_file:
             temp_file.write(uploaded_file.read())
-        print(f"Processing image from path: {temp_file_path}")  # Debug info
         ocr_types = ['ocr', 'format']
-        fine_grained_options = ['ocr', 'format']
-        color_options = ['red', 'green', 'blue']
-        box = [10, 10, 100, 100]  # Example box for demonstration
-        multi_crop_types = ['ocr', 'format']
         results = []
-        # Run basic OCR types
         for ocr_type in ocr_types:
             with torch.no_grad():
-                print(f"Running basic OCR with type: {ocr_type}")  # Debug info
                 outputs = model.chat(tokenizer, temp_file_path, ocr_type=ocr_type)
-                # Debug outputs
-                print(f"Outputs for {ocr_type}: {outputs}")
-                if isinstance(outputs, list) and outputs[0].strip():
-                    return outputs[0].strip()  # Return if successful
-                results.append(outputs[0].strip() if outputs else "No result")
-        # Try FINE-GRAINED OCR with box options
-        for ocr_type in fine_grained_options:
-            with torch.no_grad():
-                print(f"Running fine-grained OCR with box, type: {ocr_type}")  # Debug info
-                outputs = model.chat(tokenizer, temp_file_path, ocr_type=ocr_type, ocr_box=box)
-                print(f"Outputs for {ocr_type} with box: {outputs}")
                 if isinstance(outputs, list) and outputs[0].strip():
-                    return outputs[0].strip()  # Return if successful
                 results.append(outputs[0].strip() if outputs else "No result")
-        # Try FINE-GRAINED OCR with color options
-        for ocr_type in fine_grained_options:
-            for color in color_options:
-                with torch.no_grad():
-                    print(f"Running fine-grained OCR with color {color}, type: {ocr_type}")  # Debug info
-                    outputs = model.chat(tokenizer, temp_file_path, ocr_type=ocr_type, ocr_color=color)
-                    print(f"Outputs for {ocr_type} with color {color}: {outputs}")
-                    if isinstance(outputs, list) and outputs[0].strip():
-                        return outputs[0].strip()  # Return if successful
-                    results.append(outputs[0].strip() if outputs else "No result")
-        # Try MULTI-CROP OCR
-        for ocr_type in multi_crop_types:
-            with torch.no_grad():
-                print(f"Running multi-crop OCR with type: {ocr_type}")  # Debug info
-                outputs = model.chat_crop(tokenizer, temp_file_path, ocr_type=ocr_type)
-                print(f"Outputs for multi-crop {ocr_type}: {outputs}")
-                if isinstance(outputs, list) and outputs[0].strip():
-                    return outputs[0].strip()  # Return if successful
-                results.append(outputs[0].strip() if outputs else "No result")
-        # Return combined results or no text found message
-        if all(not text for text in results):
-            return "No text extracted."
-        else:
-            return "\n".join(results)
     except Exception as e:
         return f"Error during text extraction: {str(e)}"
@@ -106,4 +55,19 @@ def extract_text_got(uploaded_file):
         # Clean up temporary file
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
-            print(f"Temporary file {temp_file_path} removed.")  # Debug info

 import os
 from transformers import AutoModel, AutoTokenizer
 import torch
+import re
 # Load model and tokenizer
+model_name = "srimanth-d/GOT_CPU"  # Using GOT model on CPU
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, return_tensors='pt')
 # Load the model
 model = AutoModel.from_pretrained(
 # Ensure the model is in evaluation mode and loaded on CPU
 device = torch.device("cpu")
 model = model.eval()
+# OCR function to extract text
 def extract_text_got(uploaded_file):
     """Use GOT-OCR2.0 model to extract text from the uploaded image."""
     temp_file_path = 'temp_image.jpg'
     try:
         # Save the uploaded file temporarily
         with open(temp_file_path, 'wb') as temp_file:
             temp_file.write(uploaded_file.read())
+        print(f"Processing image from path: {temp_file_path}")
         ocr_types = ['ocr', 'format']
         results = []
+        # Run OCR on the image
         for ocr_type in ocr_types:
             with torch.no_grad():
+                print(f"Running OCR with type: {ocr_type}")
                 outputs = model.chat(tokenizer, temp_file_path, ocr_type=ocr_type)
                 if isinstance(outputs, list) and outputs[0].strip():
+                    return outputs[0].strip()  # Return the result if successful
                 results.append(outputs[0].strip() if outputs else "No result")
+        # Combine results or return no text found message
+        return results[0] if results else "No text extracted."
     except Exception as e:
         return f"Error during text extraction: {str(e)}"
         # Clean up temporary file
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
+            print(f"Temporary file {temp_file_path} removed.")
+# Function to clean extracted text (removes extra spaces and handles special cases for Hindi and English)
+def clean_text(extracted_text):
+    """
+    Cleans extracted text by removing extra spaces and handling language-specific issues (Hindi, English, Hinglish).
+    """
+    # Normalize spaces (remove multiple spaces)
+    text = re.sub(r'\s+', ' ', extracted_text)
+    # Handle special cases based on Hindi, English, and Hinglish patterns
+    text = re.sub(r'([a-zA-Z]+)\s+([a-zA-Z]+)', r'\1 \2', text)  # For English
+    text = re.sub(r'([ा-ह]+)\s+([ा-ह]+)', r'\1\2', text)  # For Hindi (conjoining Devanagari characters)
+    # Remove trailing and leading spaces
+    return text.strip()