Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

App Files Files Community

UniquePratham commited on Sep 27, 2024

Commit

c3906f7

verified ·

1 Parent(s): a0652de

Update ocr_cpu.py

Browse files

Files changed (1) hide show

ocr_cpu.py +63 -97

ocr_cpu.py CHANGED Viewed

@@ -1,122 +1,88 @@
-# ocr_cpu.py
 import os
 import torch
-from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
 import re
-# -----------------------------
-# OCR Model Initialization
-# -----------------------------
-# Load OCR model and tokenizer
-ocr_model_name = "srimanth-d/GOT_CPU"  # Using GOT model on CPU
-ocr_tokenizer = AutoTokenizer.from_pretrained(
-    ocr_model_name, trust_remote_code=True, return_tensors='pt'
-)
-# Load the OCR model
-ocr_model = AutoModel.from_pretrained(
-    ocr_model_name,
-    trust_remote_code=True,
-    low_cpu_mem_usage=True,
-    use_safetensors=True,
-    pad_token_id=ocr_tokenizer.eos_token_id,
-)
-# Ensure the OCR model is in evaluation mode and loaded on CPU
-ocr_device = torch.device("cpu")
-ocr_model = ocr_model.eval().to(ocr_device)
-# -----------------------------
-# Text Cleaning Model Initialization
-# -----------------------------
-# Load Text Cleaning model and tokenizer
-clean_model_name = "gpt2"  # You can choose a different model if preferred
-clean_tokenizer = AutoTokenizer.from_pretrained(clean_model_name)
-clean_model = AutoModelForCausalLM.from_pretrained(clean_model_name)
-# Ensure the Text Cleaning model is in evaluation mode and loaded on CPU
-clean_device = torch.device("cpu")
-clean_model = clean_model.eval().to(clean_device)
-# -----------------------------
-# OCR Function
-# -----------------------------
 def extract_text_got(uploaded_file):
-    """
-    Use GOT-OCR2.0 model to extract text from the uploaded image.
-    """
-    temp_file_path = 'temp_image.jpg'
     try:
-        # Save the uploaded file temporarily
         with open(temp_file_path, 'wb') as temp_file:
             temp_file.write(uploaded_file.read())
-        print(f"Processing image from path: {temp_file_path}")
-        ocr_types = ['ocr', 'format']
-        results = []
-        # Run OCR on the image
-        for ocr_type in ocr_types:
-            with torch.no_grad():
-                print(f"Running OCR with type: {ocr_type}")
-                outputs = ocr_model.chat(ocr_tokenizer, temp_file_path, ocr_type=ocr_type)
-                if isinstance(outputs, list) and outputs[0].strip():
-                    return outputs[0].strip()  # Return the result if successful
-                results.append(outputs[0].strip() if outputs else "No result")
-        # Combine results or return no text found message
-        return results[0] if results else "No text extracted."
     except Exception as e:
-        return f"Error during text extraction: {str(e)}"
     finally:
-        # Clean up temporary file
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
-            print(f"Temporary file {temp_file_path} removed.")
-# -----------------------------
-# Text Cleaning Function
-# -----------------------------
-def clean_text_with_ai(extracted_text):
-    """
-    Cleans extracted text by leveraging a language model to intelligently remove extra spaces and correct formatting.
-    """
     try:
-        # Define the prompt for cleaning
-        prompt = f"Please clean the following text by removing extra spaces and ensuring proper formatting:\n\n{extracted_text}\n\nCleaned Text:"
-        # Tokenize the input prompt
-        inputs = clean_tokenizer.encode(prompt, return_tensors="pt").to(clean_device)
-        # Generate the cleaned text
-        with torch.no_grad():
-            outputs = clean_model.generate(
-                inputs,
-                max_length=500,  # Adjust as needed
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                eos_token_id=clean_tokenizer.eos_token_id,
-                pad_token_id=clean_tokenizer.eos_token_id
-            )
-        # Decode the generated text
-        cleaned_text = clean_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract the cleaned text after the prompt
-        cleaned_text = cleaned_text.split("Cleaned Text:")[-1].strip()
-        return cleaned_text
-    except Exception as e:
-        return f"Error during AI text cleaning: {str(e)}"

 import os
+from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor, MllamaForConditionalGeneration
 import torch
 import re
+from PIL import Image
+# ---- GOT OCR Model Initialization and Extraction ----
+def init_got_model():
+    """Initialize GOT model and tokenizer."""
+    model_name = "srimanth-d/GOT_CPU"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, return_tensors='pt')
+    model = AutoModel.from_pretrained(model_name, trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+    return model.eval(), tokenizer
 def extract_text_got(uploaded_file):
+    """Extract text from the uploaded image using GOT model."""
+    temp_file_path = 'temp_image_got.jpg'
     try:
         with open(temp_file_path, 'wb') as temp_file:
             temp_file.write(uploaded_file.read())
+        print(f"Processing image using GOT from: {temp_file_path}")
+        model, tokenizer = init_got_model()
+        outputs = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
+        if outputs and isinstance(outputs, list):
+            return outputs[0].strip() if outputs[0].strip() else "No text extracted."
+        return "No text extracted."
     except Exception as e:
+        return f"Error: {str(e)}"
     finally:
         if os.path.exists(temp_file_path):
             os.remove(temp_file_path)
+# ---- Qwen OCR Model Initialization and Extraction ----
+def init_qwen_model():
+    """Initialize Qwen model and processor."""
+    model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", device_map="cpu", torch_dtype=torch.float16)
+    processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
+    return model.eval(), processor
+def extract_text_qwen(uploaded_file):
+    """Extract text using Qwen model."""
     try:
+        model, processor = init_qwen_model()
+        image = Image.open(uploaded_file).convert('RGB')
+        conversation = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Extract text from this image."}]}]
+        prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+        inputs = processor(text=[prompt], images=[image], return_tensors="pt")
+        output_ids = model.generate(**inputs)
+        output_text = processor.batch_decode(output_ids, skip_special_tokens=True)
+        return output_text[0] if output_text else "No text extracted."
+    except Exception as e:
+        return f"Error: {str(e)}"
+# ---- LLaMA OCR Model Initialization and Extraction ----
+def init_llama_model():
+    """Initialize LLaMA OCR model and processor."""
+    model = MllamaForConditionalGeneration.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct", torch_dtype=torch.bfloat16, device_map="cpu")
+    processor = AutoProcessor.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
+    return model.eval(), processor
+def extract_text_llama(uploaded_file):
+    """Extract text using LLaMA model."""
+    try:
+        model, processor = init_llama_model()
+        image = Image.open(uploaded_file).convert('RGB')
+        prompt = "You are an OCR engine. Extract text from this image."
+        inputs = processor(images=image, text=prompt, return_tensors="pt")
+        output_ids = model.generate(**inputs)
+        return processor.decode(output_ids[0], skip_special_tokens=True).strip()
+    except Exception as e:
+        return f"Error: {str(e)}"
+# ---- AI-based Text Cleanup ----
+def clean_extracted_text(text):
+    """Clean the extracted text by removing extra spaces intelligently."""
+    # Remove multiple spaces
+    cleaned_text = re.sub(r'\s+', ' ', text).strip()
+    # Further clean punctuations with spaces around them
+    cleaned_text = re.sub(r'\s([?.!,])', r'\1', cleaned_text)
+    return cleaned_text