Spaces:

kavehtaheri
/

ocrlight2.1overlaytext

Sleeping

App Files Files Community

kavehtaheri commited on Jul 26, 2025

Commit

62937d0

verified ·

1 Parent(s): d527072

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -92

app.py CHANGED Viewed

@@ -3,22 +3,36 @@ import easyocr
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import google.generativeai as genai
 import arabic_reshaper
 from bidi.algorithm import get_display
-# -- CONFIGURATION --
 api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
 PERSIAN_FONT_PATH = "vazir.ttf"
 reader = None
 def initialize_reader():
     global reader
     if reader is None:
         reader = easyocr.Reader(['en'], gpu=False, verbose=False)
     return reader
 def extract_text_and_bbox(image):
     if image is None:
         return "Please upload an image first.", None
@@ -32,7 +46,7 @@ def extract_text_and_bbox(image):
         min_x, min_y = float('inf'), float('inf')
         max_x, max_y = float('-inf'), float('-inf')
         text_parts = []
         for (bbox, text, prob) in results:
             text_parts.append(text)
@@ -41,62 +55,48 @@ def extract_text_and_bbox(image):
             min_y = min(min_y, tl[1], tr[1])
             max_x = max(max_x, tr[0], br[0])
             max_y = max(max_y, bl[1], br[1])
         extracted_text = ' '.join(text_parts)
         consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
         return extracted_text, consolidated_bbox
     except Exception as e:
         return f"Error processing image: {str(e)}", None
 def translate_text_gemini(text):
     if not text or "No text" in text or "Error" in text or "Please upload" in text:
         return "No valid text to translate."
     try:
         genai.configure(api_key=api_key)
         model = genai.GenerativeModel('gemini-1.5-flash')
-        prompt = (
-            "Translate the following English text to Persian. Your translation should be natural, touching, and relatable, "
-            "like casual chats with a friend—short and heartfelt. Use colloquial Persian words and contractions where appropriate. "
-            "Do not add any extra explanations, greetings, or emojis. Output ONLY the Persian translation. "
-            f"English text: [{text}]")
         response = model.generate_content(prompt)
         return response.text.strip()
     except Exception as e:
         return f"Error during translation: {str(e)}"
-#########################################
-# ----------- RTL OVERLAY FIX --------- #
-#########################################
-def wrap_rtl_text(text, font, max_width, draw):
     """
-    Given already shaped and bidi'ed RTL text, wrap lines to fit within max_width.
-    Returns a list of lines (strings).
     """
-    words = text.split(' ')
-    lines = []
-    current_line = ""
-    for word in words:
-        test_line = (current_line + " " + word).strip() if current_line else word
-        test_width = draw.textlength(test_line, font=font)
-        if test_width <= max_width:
-            current_line = test_line
-        else:
-            if current_line:  # push current line, start new
-                lines.append(current_line)
-            current_line = word
-    if current_line:
-        lines.append(current_line)
-    return lines
-def overlay_text_on_image(original_image, text_to_overlay, bbox):
     image_copy = original_image.copy()
     draw = ImageDraw.Draw(image_copy)
-    # 1. Erase the old text
     padding = 10
     erase_box = (bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding)
     try:
         sample_x = max(0, int(erase_box[0]) - 5)
         sample_y = int((erase_box[1] + erase_box[3]) / 2)
@@ -105,78 +105,148 @@ def overlay_text_on_image(original_image, text_to_overlay, bbox):
         bg_color = image_copy.getpixel((sample_x, sample_y))
     except (ValueError, IndexError):
         bg_color = (0, 0, 0)
     draw.rectangle(erase_box, fill=bg_color)
-    # 2. Split into logical lines before reshaping (LTR split)
-    # Use max box width to wrap at word boundaries
-    target_width = (erase_box[2] - erase_box[0]) * 0.95
     target_height = erase_box[3] - erase_box[1]
-    font_size = 90
-    selected_font = "VAZIR.TTF"
-    while font_size > 10:
-        font = ImageFont.truetype(selected_font, font_size)
-        words = text_to_overlay.split()
-        lines = []
-        current_line = ""
-        for word in words:
-            test_line = (current_line + " " + word).strip()
-            # Only reshape for measuring
-            reshaped_test_line = arabic_reshaper.reshape(test_line)
-            line_width = draw.textlength(reshaped_test_line, font=font)
-            if line_width <= target_width:
-                current_line = test_line
-            else:
-                if current_line:
-                    lines.append(current_line)
-                current_line = word
-        if current_line:
-            lines.append(current_line)
-        # Calculate total height
-        total_height = sum(
-            draw.textbbox((0,0), arabic_reshaper.reshape(line), font=font)[3] -
-            draw.textbbox((0,0), arabic_reshaper.reshape(line), font=font)[1]
-            for line in lines
-        ) + (len(lines) - 1) * int(font_size * 0.35)
-        if total_height <= target_height:
-            break
-        font_size -= 2
-    final_font = ImageFont.truetype(selected_font, font_size)
-    line_spacing = int(final_font.size * 0.35)
-    total_text_height = 0
-    line_heights = []
     reshaped_lines = []
     for line in lines:
         reshaped = arabic_reshaper.reshape(line)
-        bbox_line = draw.textbbox((0, 0), reshaped, font=final_font)
-        line_height = bbox_line[3] - bbox_line[1]
         line_heights.append(line_height)
         total_text_height += line_height
-        reshaped_lines.append(reshaped)
-    total_text_height += (len(reshaped_lines) - 1) * line_spacing
-    # Start vertical centering
-    y_start = erase_box[1] + ((erase_box[3] - erase_box[1]) - total_text_height) // 2
     current_y = y_start
-    for i, line in enumerate(reshaped_lines):
-        line_height = line_heights[i]
         x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
-        line_y_center = current_y + line_height / 2
-        # Shadow for visibility (optional)
-        draw.text((x_center + 2, line_y_center + 2), line, font=final_font, fill=(0, 0, 0), anchor="mm")
-        # Main text
-        draw.text((x_center, line_y_center), line, font=final_font, fill=(255, 255, 255), anchor="mm")
-        current_y += line_height + line_spacing
     return image_copy
-#########################################
-# --------- Gradio Interface  ----------#
-#########################################
 with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Quote Image Translator")
@@ -216,13 +286,14 @@ with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
         if bbox is None:
             return extracted_text, "No text to translate.", None
         translated_text = translate_text_gemini(extracted_text)
         if "Error" in translated_text:
             return extracted_text, translated_text, None
         final_image = overlay_text_on_image(image, translated_text, bbox)
         return extracted_text, translated_text, final_image
     image_input.change(

 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import google.generativeai as genai
+import time
 import arabic_reshaper
 from bidi.algorithm import get_display
+# --- CONFIGURATION ---
+# It's best practice to load secrets from environment variables in Hugging Face
+# import os
+# api_key = os.environ.get("GEMINI_API_KEY")
 api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
 PERSIAN_FONT_PATH = "vazir.ttf"
+# --- GLOBAL INITIALIZATION ---
 reader = None
 def initialize_reader():
+    """Initialize EasyOCR reader if it hasn't been already."""
     global reader
     if reader is None:
+        print("Loading EasyOCR model...")
         reader = easyocr.Reader(['en'], gpu=False, verbose=False)
+        print("EasyOCR model loaded successfully!")
     return reader
+# --- CORE FUNCTIONS ---
 def extract_text_and_bbox(image):
+    """
+    Extracts text and calculates a single consolidated bounding box for all text found.
+    """
     if image is None:
         return "Please upload an image first.", None
         min_x, min_y = float('inf'), float('inf')
         max_x, max_y = float('-inf'), float('-inf')
         text_parts = []
         for (bbox, text, prob) in results:
             text_parts.append(text)
             min_y = min(min_y, tl[1], tr[1])
             max_x = max(max_x, tr[0], br[0])
             max_y = max(max_y, bl[1], br[1])
         extracted_text = ' '.join(text_parts)
         consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
         return extracted_text, consolidated_bbox
     except Exception as e:
         return f"Error processing image: {str(e)}", None
 def translate_text_gemini(text):
+    """Translates text using Gemini API."""
     if not text or "No text" in text or "Error" in text or "Please upload" in text:
         return "No valid text to translate."
     try:
         genai.configure(api_key=api_key)
         model = genai.GenerativeModel('gemini-1.5-flash')
+        prompt = (f"Translate the following English text to Persian. Your translation should be natural, touching, and relatable, "
+                  f"like casual chats with a friend—short and heartfelt. Use colloquial Persian words and contractions where appropriate. "
+                  f"Do not add any extra explanations, greetings, or emojis. Output ONLY the Persian translation. "
+                  f"English text: [{text}]")
         response = model.generate_content(prompt)
         return response.text.strip()
     except Exception as e:
         return f"Error during translation: {str(e)}"
+# --- CORRECTED IMAGE OVERLAY FUNCTION ---
+def overlay_text_on_image(original_image, text_to_overlay, bbox):
     """
+    Overlays Persian text onto an image, erasing the content within the given bounding box.
+    Fixed to properly handle RTL text rendering like the working example.
     """
     image_copy = original_image.copy()
     draw = ImageDraw.Draw(image_copy)
+    # 1. Erase the old text (Inpainting)
     padding = 10
     erase_box = (bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding)
     try:
         sample_x = max(0, int(erase_box[0]) - 5)
         sample_y = int((erase_box[1] + erase_box[3]) / 2)
         bg_color = image_copy.getpixel((sample_x, sample_y))
     except (ValueError, IndexError):
         bg_color = (0, 0, 0)
     draw.rectangle(erase_box, fill=bg_color)
+    # 2. Text processing following the working pattern
+    target_width = (erase_box[2] - erase_box[0]) * 0.90  # 90% like in working code
     target_height = erase_box[3] - erase_box[1]
+    # Split text into lines (or words if needed for wrapping)
+    lines = [line.strip() for line in text_to_overlay.split('\n') if line.strip()]
+    if not lines:
+        lines = [text_to_overlay]  # Single line if no newlines
+    # **KEY FIX**: Reshape ALL lines first, then apply get_display()
     reshaped_lines = []
     for line in lines:
         reshaped = arabic_reshaper.reshape(line)
+        display_text = get_display(reshaped)  # This was missing!
+        reshaped_lines.append(display_text)
+    # 3. Find optimal font size
+    font_size = 100
+    final_font = None
+    # Find the longest line for font sizing (like in working code)
+    if reshaped_lines:
+        temp_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
+        longest_line = max(reshaped_lines, key=lambda line: draw.textlength(line, font=temp_font))
+        # Reduce font size until longest line fits
+        while font_size > 10:
+            font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
+            if draw.textlength(longest_line, font=font) <= target_width:
+                final_font = font
+                break
+            font_size -= 2
+    if final_font is None:
+        final_font = ImageFont.truetype(PERSIAN_FONT_PATH, 12)
+    # 4. Handle line wrapping if text is too wide
+    final_lines = []
+    for line in reshaped_lines:
+        if draw.textlength(line, font=final_font) <= target_width:
+            final_lines.append(line)
+        else:
+            # Need to wrap this line - split by words and rewrap
+            original_line = lines[reshaped_lines.index(line)]  # Get original before reshaping
+            words = original_line.split()
+            current_line_words = []
+            for word in words:
+                test_words = current_line_words + [word]
+                test_text = ' '.join(test_words)
+                # Process the test text properly
+                test_reshaped = arabic_reshaper.reshape(test_text)
+                test_display = get_display(test_reshaped)
+                if draw.textlength(test_display, font=final_font) <= target_width:
+                    current_line_words = test_words
+                else:
+                    # Line is full, save current line and start new one
+                    if current_line_words:
+                        line_text = ' '.join(current_line_words)
+                        line_reshaped = arabic_reshaper.reshape(line_text)
+                        line_display = get_display(line_reshaped)
+                        final_lines.append(line_display)
+                    current_line_words = [word]
+            # Add remaining words
+            if current_line_words:
+                line_text = ' '.join(current_line_words)
+                line_reshaped = arabic_reshaper.reshape(line_text)
+                line_display = get_display(line_reshaped)
+                final_lines.append(line_display)
+    # 5. Calculate total height and center text (following working pattern)
+    line_spacing = 20  # Same as working code
+    total_text_height = 0
+    line_heights = []
+    for line in final_lines:
+        line_bbox = draw.textbbox((0, 0), line, font=final_font)
+        line_height = line_bbox[3] - line_bbox[1]
         line_heights.append(line_height)
         total_text_height += line_height
+    # Add spacing between lines
+    if len(final_lines) > 1:
+        total_text_height += (len(final_lines) - 1) * line_spacing
+    # Check if total height fits, if not reduce font size
+    while total_text_height > target_height and font_size > 10:
+        font_size -= 2
+        final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
+        # Recalculate heights
+        total_text_height = 0
+        line_heights = []
+        for line in final_lines:
+            line_bbox = draw.textbbox((0, 0), line, font=final_font)
+            line_height = line_bbox[3] - line_bbox[1]
+            line_heights.append(line_height)
+            total_text_height += line_height
+        if len(final_lines) > 1:
+            total_text_height += (len(final_lines) - 1) * line_spacing
+    # Center vertically in the erase box
+    y_start = erase_box[1] + (target_height - total_text_height) / 2
+    # 6. Draw the text (following working pattern)
     current_y = y_start
+    for i, line in enumerate(final_lines):
+        # Center horizontally
+        line_width = draw.textlength(line, font=final_font)
         x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
+        line_y_center = current_y + line_heights[i] / 2
+        # Draw shadow for visibility
+        draw.text(
+            (x_center + 1, line_y_center + 1),
+            line,
+            font=final_font,
+            fill=(0, 0, 0),  # Black shadow
+            anchor="mm"
+        )
+        # Draw main text
+        draw.text(
+            (x_center, line_y_center),
+            line,
+            font=final_font,
+            fill=(255, 255, 255),  # White text
+            anchor="mm"
+        )
+        current_y += line_heights[i] + line_spacing
     return image_copy
+# --- GRADIO INTERFACE ---
 with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Quote Image Translator")
         if bbox is None:
             return extracted_text, "No text to translate.", None
         translated_text = translate_text_gemini(extracted_text)
         if "Error" in translated_text:
             return extracted_text, translated_text, None
         final_image = overlay_text_on_image(image, translated_text, bbox)
         return extracted_text, translated_text, final_image
     image_input.change(