kavehtaheri commited on
Commit
62937d0
·
verified ·
1 Parent(s): d527072

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -92
app.py CHANGED
@@ -3,22 +3,36 @@ import easyocr
3
  from PIL import Image, ImageDraw, ImageFont
4
  import numpy as np
5
  import google.generativeai as genai
 
6
  import arabic_reshaper
7
  from bidi.algorithm import get_display
8
 
9
- # -- CONFIGURATION --
 
 
 
10
  api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
 
11
  PERSIAN_FONT_PATH = "vazir.ttf"
12
 
 
13
  reader = None
14
 
15
  def initialize_reader():
 
16
  global reader
17
  if reader is None:
 
18
  reader = easyocr.Reader(['en'], gpu=False, verbose=False)
 
19
  return reader
20
 
 
 
21
  def extract_text_and_bbox(image):
 
 
 
22
  if image is None:
23
  return "Please upload an image first.", None
24
 
@@ -32,7 +46,7 @@ def extract_text_and_bbox(image):
32
 
33
  min_x, min_y = float('inf'), float('inf')
34
  max_x, max_y = float('-inf'), float('-inf')
35
-
36
  text_parts = []
37
  for (bbox, text, prob) in results:
38
  text_parts.append(text)
@@ -41,62 +55,48 @@ def extract_text_and_bbox(image):
41
  min_y = min(min_y, tl[1], tr[1])
42
  max_x = max(max_x, tr[0], br[0])
43
  max_y = max(max_y, bl[1], br[1])
44
-
45
  extracted_text = ' '.join(text_parts)
46
  consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
 
47
  return extracted_text, consolidated_bbox
48
 
49
  except Exception as e:
50
  return f"Error processing image: {str(e)}", None
51
 
 
52
  def translate_text_gemini(text):
 
53
  if not text or "No text" in text or "Error" in text or "Please upload" in text:
54
  return "No valid text to translate."
 
55
  try:
56
  genai.configure(api_key=api_key)
57
  model = genai.GenerativeModel('gemini-1.5-flash')
58
- prompt = (
59
- "Translate the following English text to Persian. Your translation should be natural, touching, and relatable, "
60
- "like casual chats with a friend—short and heartfelt. Use colloquial Persian words and contractions where appropriate. "
61
- "Do not add any extra explanations, greetings, or emojis. Output ONLY the Persian translation. "
62
- f"English text: [{text}]")
63
  response = model.generate_content(prompt)
64
  return response.text.strip()
65
  except Exception as e:
66
  return f"Error during translation: {str(e)}"
67
 
68
- #########################################
69
- # ----------- RTL OVERLAY FIX --------- #
70
- #########################################
71
 
72
- def wrap_rtl_text(text, font, max_width, draw):
73
  """
74
- Given already shaped and bidi'ed RTL text, wrap lines to fit within max_width.
75
- Returns a list of lines (strings).
76
  """
77
- words = text.split(' ')
78
- lines = []
79
- current_line = ""
80
- for word in words:
81
- test_line = (current_line + " " + word).strip() if current_line else word
82
- test_width = draw.textlength(test_line, font=font)
83
- if test_width <= max_width:
84
- current_line = test_line
85
- else:
86
- if current_line: # push current line, start new
87
- lines.append(current_line)
88
- current_line = word
89
- if current_line:
90
- lines.append(current_line)
91
- return lines
92
-
93
- def overlay_text_on_image(original_image, text_to_overlay, bbox):
94
  image_copy = original_image.copy()
95
  draw = ImageDraw.Draw(image_copy)
96
 
97
- # 1. Erase the old text
98
  padding = 10
99
  erase_box = (bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding)
 
100
  try:
101
  sample_x = max(0, int(erase_box[0]) - 5)
102
  sample_y = int((erase_box[1] + erase_box[3]) / 2)
@@ -105,78 +105,148 @@ def overlay_text_on_image(original_image, text_to_overlay, bbox):
105
  bg_color = image_copy.getpixel((sample_x, sample_y))
106
  except (ValueError, IndexError):
107
  bg_color = (0, 0, 0)
 
108
  draw.rectangle(erase_box, fill=bg_color)
109
 
110
- # 2. Split into logical lines before reshaping (LTR split)
111
- # Use max box width to wrap at word boundaries
112
- target_width = (erase_box[2] - erase_box[0]) * 0.95
113
  target_height = erase_box[3] - erase_box[1]
114
-
115
- font_size = 90
116
- selected_font = "VAZIR.TTF"
117
- while font_size > 10:
118
- font = ImageFont.truetype(selected_font, font_size)
119
- words = text_to_overlay.split()
120
- lines = []
121
- current_line = ""
122
- for word in words:
123
- test_line = (current_line + " " + word).strip()
124
- # Only reshape for measuring
125
- reshaped_test_line = arabic_reshaper.reshape(test_line)
126
- line_width = draw.textlength(reshaped_test_line, font=font)
127
- if line_width <= target_width:
128
- current_line = test_line
129
- else:
130
- if current_line:
131
- lines.append(current_line)
132
- current_line = word
133
- if current_line:
134
- lines.append(current_line)
135
- # Calculate total height
136
- total_height = sum(
137
- draw.textbbox((0,0), arabic_reshaper.reshape(line), font=font)[3] -
138
- draw.textbbox((0,0), arabic_reshaper.reshape(line), font=font)[1]
139
- for line in lines
140
- ) + (len(lines) - 1) * int(font_size * 0.35)
141
- if total_height <= target_height:
142
- break
143
- font_size -= 2
144
-
145
- final_font = ImageFont.truetype(selected_font, font_size)
146
- line_spacing = int(final_font.size * 0.35)
147
- total_text_height = 0
148
- line_heights = []
149
  reshaped_lines = []
150
  for line in lines:
151
  reshaped = arabic_reshaper.reshape(line)
152
- bbox_line = draw.textbbox((0, 0), reshaped, font=final_font)
153
- line_height = bbox_line[3] - bbox_line[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  line_heights.append(line_height)
155
  total_text_height += line_height
156
- reshaped_lines.append(reshaped)
157
- total_text_height += (len(reshaped_lines) - 1) * line_spacing
158
-
159
- # Start vertical centering
160
- y_start = erase_box[1] + ((erase_box[3] - erase_box[1]) - total_text_height) // 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  current_y = y_start
162
-
163
- for i, line in enumerate(reshaped_lines):
164
- line_height = line_heights[i]
165
  x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
166
- line_y_center = current_y + line_height / 2
167
-
168
- # Shadow for visibility (optional)
169
- draw.text((x_center + 2, line_y_center + 2), line, font=final_font, fill=(0, 0, 0), anchor="mm")
170
- # Main text
171
- draw.text((x_center, line_y_center), line, font=final_font, fill=(255, 255, 255), anchor="mm")
172
- current_y += line_height + line_spacing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  return image_copy
175
 
176
-
177
- #########################################
178
- # --------- Gradio Interface ----------#
179
- #########################################
180
 
181
  with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
182
  gr.Markdown("# 📝 Quote Image Translator")
@@ -216,13 +286,14 @@ with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
216
 
217
  if bbox is None:
218
  return extracted_text, "No text to translate.", None
219
-
220
  translated_text = translate_text_gemini(extracted_text)
221
 
222
  if "Error" in translated_text:
223
  return extracted_text, translated_text, None
224
 
225
  final_image = overlay_text_on_image(image, translated_text, bbox)
 
226
  return extracted_text, translated_text, final_image
227
 
228
  image_input.change(
 
3
  from PIL import Image, ImageDraw, ImageFont
4
  import numpy as np
5
  import google.generativeai as genai
6
+ import time
7
  import arabic_reshaper
8
  from bidi.algorithm import get_display
9
 
10
+ # --- CONFIGURATION ---
11
+ # It's best practice to load secrets from environment variables in Hugging Face
12
+ # import os
13
+ # api_key = os.environ.get("GEMINI_API_KEY")
14
  api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
15
+
16
  PERSIAN_FONT_PATH = "vazir.ttf"
17
 
18
+ # --- GLOBAL INITIALIZATION ---
19
  reader = None
20
 
21
  def initialize_reader():
22
+ """Initialize EasyOCR reader if it hasn't been already."""
23
  global reader
24
  if reader is None:
25
+ print("Loading EasyOCR model...")
26
  reader = easyocr.Reader(['en'], gpu=False, verbose=False)
27
+ print("EasyOCR model loaded successfully!")
28
  return reader
29
 
30
+ # --- CORE FUNCTIONS ---
31
+
32
  def extract_text_and_bbox(image):
33
+ """
34
+ Extracts text and calculates a single consolidated bounding box for all text found.
35
+ """
36
  if image is None:
37
  return "Please upload an image first.", None
38
 
 
46
 
47
  min_x, min_y = float('inf'), float('inf')
48
  max_x, max_y = float('-inf'), float('-inf')
49
+
50
  text_parts = []
51
  for (bbox, text, prob) in results:
52
  text_parts.append(text)
 
55
  min_y = min(min_y, tl[1], tr[1])
56
  max_x = max(max_x, tr[0], br[0])
57
  max_y = max(max_y, bl[1], br[1])
58
+
59
  extracted_text = ' '.join(text_parts)
60
  consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
61
+
62
  return extracted_text, consolidated_bbox
63
 
64
  except Exception as e:
65
  return f"Error processing image: {str(e)}", None
66
 
67
+
68
  def translate_text_gemini(text):
69
+ """Translates text using Gemini API."""
70
  if not text or "No text" in text or "Error" in text or "Please upload" in text:
71
  return "No valid text to translate."
72
+
73
  try:
74
  genai.configure(api_key=api_key)
75
  model = genai.GenerativeModel('gemini-1.5-flash')
76
+ prompt = (f"Translate the following English text to Persian. Your translation should be natural, touching, and relatable, "
77
+ f"like casual chats with a friend—short and heartfelt. Use colloquial Persian words and contractions where appropriate. "
78
+ f"Do not add any extra explanations, greetings, or emojis. Output ONLY the Persian translation. "
79
+ f"English text: [{text}]")
80
+
81
  response = model.generate_content(prompt)
82
  return response.text.strip()
83
  except Exception as e:
84
  return f"Error during translation: {str(e)}"
85
 
86
+ # --- CORRECTED IMAGE OVERLAY FUNCTION ---
 
 
87
 
88
+ def overlay_text_on_image(original_image, text_to_overlay, bbox):
89
  """
90
+ Overlays Persian text onto an image, erasing the content within the given bounding box.
91
+ Fixed to properly handle RTL text rendering like the working example.
92
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  image_copy = original_image.copy()
94
  draw = ImageDraw.Draw(image_copy)
95
 
96
+ # 1. Erase the old text (Inpainting)
97
  padding = 10
98
  erase_box = (bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding)
99
+
100
  try:
101
  sample_x = max(0, int(erase_box[0]) - 5)
102
  sample_y = int((erase_box[1] + erase_box[3]) / 2)
 
105
  bg_color = image_copy.getpixel((sample_x, sample_y))
106
  except (ValueError, IndexError):
107
  bg_color = (0, 0, 0)
108
+
109
  draw.rectangle(erase_box, fill=bg_color)
110
 
111
+ # 2. Text processing following the working pattern
112
+ target_width = (erase_box[2] - erase_box[0]) * 0.90 # 90% like in working code
 
113
  target_height = erase_box[3] - erase_box[1]
114
+
115
+ # Split text into lines (or words if needed for wrapping)
116
+ lines = [line.strip() for line in text_to_overlay.split('\n') if line.strip()]
117
+ if not lines:
118
+ lines = [text_to_overlay] # Single line if no newlines
119
+
120
+ # **KEY FIX**: Reshape ALL lines first, then apply get_display()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  reshaped_lines = []
122
  for line in lines:
123
  reshaped = arabic_reshaper.reshape(line)
124
+ display_text = get_display(reshaped) # This was missing!
125
+ reshaped_lines.append(display_text)
126
+
127
+ # 3. Find optimal font size
128
+ font_size = 100
129
+ final_font = None
130
+
131
+ # Find the longest line for font sizing (like in working code)
132
+ if reshaped_lines:
133
+ temp_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
134
+ longest_line = max(reshaped_lines, key=lambda line: draw.textlength(line, font=temp_font))
135
+
136
+ # Reduce font size until longest line fits
137
+ while font_size > 10:
138
+ font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
139
+ if draw.textlength(longest_line, font=font) <= target_width:
140
+ final_font = font
141
+ break
142
+ font_size -= 2
143
+
144
+ if final_font is None:
145
+ final_font = ImageFont.truetype(PERSIAN_FONT_PATH, 12)
146
+
147
+ # 4. Handle line wrapping if text is too wide
148
+ final_lines = []
149
+ for line in reshaped_lines:
150
+ if draw.textlength(line, font=final_font) <= target_width:
151
+ final_lines.append(line)
152
+ else:
153
+ # Need to wrap this line - split by words and rewrap
154
+ original_line = lines[reshaped_lines.index(line)] # Get original before reshaping
155
+ words = original_line.split()
156
+
157
+ current_line_words = []
158
+ for word in words:
159
+ test_words = current_line_words + [word]
160
+ test_text = ' '.join(test_words)
161
+
162
+ # Process the test text properly
163
+ test_reshaped = arabic_reshaper.reshape(test_text)
164
+ test_display = get_display(test_reshaped)
165
+
166
+ if draw.textlength(test_display, font=final_font) <= target_width:
167
+ current_line_words = test_words
168
+ else:
169
+ # Line is full, save current line and start new one
170
+ if current_line_words:
171
+ line_text = ' '.join(current_line_words)
172
+ line_reshaped = arabic_reshaper.reshape(line_text)
173
+ line_display = get_display(line_reshaped)
174
+ final_lines.append(line_display)
175
+ current_line_words = [word]
176
+
177
+ # Add remaining words
178
+ if current_line_words:
179
+ line_text = ' '.join(current_line_words)
180
+ line_reshaped = arabic_reshaper.reshape(line_text)
181
+ line_display = get_display(line_reshaped)
182
+ final_lines.append(line_display)
183
+
184
+ # 5. Calculate total height and center text (following working pattern)
185
+ line_spacing = 20 # Same as working code
186
+ total_text_height = 0
187
+ line_heights = []
188
+
189
+ for line in final_lines:
190
+ line_bbox = draw.textbbox((0, 0), line, font=final_font)
191
+ line_height = line_bbox[3] - line_bbox[1]
192
  line_heights.append(line_height)
193
  total_text_height += line_height
194
+
195
+ # Add spacing between lines
196
+ if len(final_lines) > 1:
197
+ total_text_height += (len(final_lines) - 1) * line_spacing
198
+
199
+ # Check if total height fits, if not reduce font size
200
+ while total_text_height > target_height and font_size > 10:
201
+ font_size -= 2
202
+ final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
203
+
204
+ # Recalculate heights
205
+ total_text_height = 0
206
+ line_heights = []
207
+ for line in final_lines:
208
+ line_bbox = draw.textbbox((0, 0), line, font=final_font)
209
+ line_height = line_bbox[3] - line_bbox[1]
210
+ line_heights.append(line_height)
211
+ total_text_height += line_height
212
+
213
+ if len(final_lines) > 1:
214
+ total_text_height += (len(final_lines) - 1) * line_spacing
215
+
216
+ # Center vertically in the erase box
217
+ y_start = erase_box[1] + (target_height - total_text_height) / 2
218
+
219
+ # 6. Draw the text (following working pattern)
220
  current_y = y_start
221
+ for i, line in enumerate(final_lines):
222
+ # Center horizontally
223
+ line_width = draw.textlength(line, font=final_font)
224
  x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
225
+ line_y_center = current_y + line_heights[i] / 2
226
+
227
+ # Draw shadow for visibility
228
+ draw.text(
229
+ (x_center + 1, line_y_center + 1),
230
+ line,
231
+ font=final_font,
232
+ fill=(0, 0, 0), # Black shadow
233
+ anchor="mm"
234
+ )
235
+
236
+ # Draw main text
237
+ draw.text(
238
+ (x_center, line_y_center),
239
+ line,
240
+ font=final_font,
241
+ fill=(255, 255, 255), # White text
242
+ anchor="mm"
243
+ )
244
+
245
+ current_y += line_heights[i] + line_spacing
246
 
247
  return image_copy
248
 
249
+ # --- GRADIO INTERFACE ---
 
 
 
250
 
251
  with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
252
  gr.Markdown("# 📝 Quote Image Translator")
 
286
 
287
  if bbox is None:
288
  return extracted_text, "No text to translate.", None
289
+
290
  translated_text = translate_text_gemini(extracted_text)
291
 
292
  if "Error" in translated_text:
293
  return extracted_text, translated_text, None
294
 
295
  final_image = overlay_text_on_image(image, translated_text, bbox)
296
+
297
  return extracted_text, translated_text, final_image
298
 
299
  image_input.change(