Spaces:
Sleeping
Sleeping
File size: 11,546 Bytes
b87a90e 62937d0 b87a90e 62937d0 c7d0c95 62937d0 57e51e5 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 c7d0c95 62937d0 b87a90e 62937d0 b87a90e 62937d0 b87a90e 35b7e78 b87a90e 35b7e78 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 c4824c3 b87a90e 35b7e78 b87a90e 62937d0 c4824c3 62937d0 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 b87a90e 62937d0 1d4c991 c7d0c95 62937d0 c4824c3 1d4c991 c4824c3 c7d0c95 c4824c3 1d4c991 f354f07 62937d0 b87a90e 62937d0 c7d0c95 62937d0 0696748 62937d0 0696748 62937d0 f354f07 62937d0 0696748 62937d0 35b7e78 b87a90e 62937d0 c4824c3 f0a861c c4824c3 f0a861c c4824c3 f0a861c 62937d0 f0a861c c4824c3 f0a861c 62937d0 f0a861c b87a90e c4824c3 b87a90e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | import gradio as gr
import easyocr
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import google.generativeai as genai
import time
import arabic_reshaper
from bidi.algorithm import get_display
# --- CONFIGURATION ---
# It's best practice to load secrets from environment variables in Hugging Face
# import os
# api_key = os.environ.get("GEMINI_API_KEY")
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
PERSIAN_FONT_PATH = "vazir.ttf"
# --- GLOBAL INITIALIZATION ---
reader = None
def initialize_reader():
"""Initialize EasyOCR reader if it hasn't been already."""
global reader
if reader is None:
print("Loading EasyOCR model...")
reader = easyocr.Reader(['en'], gpu=False, verbose=False)
print("EasyOCR model loaded successfully!")
return reader
# --- CORE FUNCTIONS ---
def extract_text_and_bbox(image):
"""
Extracts text and calculates a single consolidated bounding box for all text found.
"""
if image is None:
return "Please upload an image first.", None
try:
reader = initialize_reader()
img_array = np.array(image)
results = reader.readtext(img_array)
if not results:
return "No text detected in the image.", None
min_x, min_y = float('inf'), float('inf')
max_x, max_y = float('-inf'), float('-inf')
text_parts = []
for (bbox, text, prob) in results:
text_parts.append(text)
(tl, tr, br, bl) = bbox
min_x = min(min_x, tl[0], bl[0])
min_y = min(min_y, tl[1], tr[1])
max_x = max(max_x, tr[0], br[0])
max_y = max(max_y, bl[1], br[1])
extracted_text = ' '.join(text_parts)
consolidated_bbox = (int(min_x), int(min_y), int(max_x), int(max_y))
return extracted_text, consolidated_bbox
except Exception as e:
return f"Error processing image: {str(e)}", None
def translate_text_gemini(text):
"""Translates text using Gemini API."""
if not text or "No text" in text or "Error" in text or "Please upload" in text:
return "No valid text to translate."
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-1.5-flash')
prompt = (f"Translate the following English text to Persian. Your translation should be natural, touching, and relatable, "
f"like casual chats with a friend—short and heartfelt. Use colloquial Persian words and contractions where appropriate. "
f"Do not add any extra explanations, greetings, or emojis. Output ONLY the Persian translation. "
f"English text: [{text}]")
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
return f"Error during translation: {str(e)}"
# --- CORRECTED IMAGE OVERLAY FUNCTION ---
def overlay_text_on_image(original_image, text_to_overlay, bbox):
"""
Overlays Persian text onto an image, erasing the content within the given bounding box.
Fixed to properly handle RTL text rendering like the working example.
"""
image_copy = original_image.copy()
draw = ImageDraw.Draw(image_copy)
# 1. Erase the old text (Inpainting)
padding = 10
erase_box = (bbox[0] - padding, bbox[1] - padding, bbox[2] + padding, bbox[3] + padding)
try:
sample_x = max(0, int(erase_box[0]) - 5)
sample_y = int((erase_box[1] + erase_box[3]) / 2)
sample_x = min(sample_x, image_copy.width - 1)
sample_y = min(sample_y, image_copy.height - 1)
bg_color = image_copy.getpixel((sample_x, sample_y))
except (ValueError, IndexError):
bg_color = (0, 0, 0)
draw.rectangle(erase_box, fill=bg_color)
# 2. Text processing following the working pattern
target_width = (erase_box[2] - erase_box[0]) * 0.90 # 90% like in working code
target_height = erase_box[3] - erase_box[1]
# Split text into lines (or words if needed for wrapping)
lines = [line.strip() for line in text_to_overlay.split('\n') if line.strip()]
if not lines:
lines = [text_to_overlay] # Single line if no newlines
# **KEY FIX**: Reshape ALL lines first, then apply get_display()
reshaped_lines = []
for line in lines:
reshaped = arabic_reshaper.reshape(line)
display_text = get_display(reshaped) # This was missing!
reshaped_lines.append(display_text)
# 3. Find optimal font size
font_size = 100
final_font = None
# Find the longest line for font sizing (like in working code)
if reshaped_lines:
temp_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
longest_line = max(reshaped_lines, key=lambda line: draw.textlength(line, font=temp_font))
# Reduce font size until longest line fits
while font_size > 10:
font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
if draw.textlength(longest_line, font=font) <= target_width:
final_font = font
break
font_size -= 2
if final_font is None:
final_font = ImageFont.truetype(PERSIAN_FONT_PATH, 12)
# 4. Handle line wrapping if text is too wide
final_lines = []
for line in reshaped_lines:
if draw.textlength(line, font=final_font) <= target_width:
final_lines.append(line)
else:
# Need to wrap this line - split by words and rewrap
original_line = lines[reshaped_lines.index(line)] # Get original before reshaping
words = original_line.split()
current_line_words = []
for word in words:
test_words = current_line_words + [word]
test_text = ' '.join(test_words)
# Process the test text properly
test_reshaped = arabic_reshaper.reshape(test_text)
test_display = get_display(test_reshaped)
if draw.textlength(test_display, font=final_font) <= target_width:
current_line_words = test_words
else:
# Line is full, save current line and start new one
if current_line_words:
line_text = ' '.join(current_line_words)
line_reshaped = arabic_reshaper.reshape(line_text)
line_display = get_display(line_reshaped)
final_lines.append(line_display)
current_line_words = [word]
# Add remaining words
if current_line_words:
line_text = ' '.join(current_line_words)
line_reshaped = arabic_reshaper.reshape(line_text)
line_display = get_display(line_reshaped)
final_lines.append(line_display)
# 5. Calculate total height and center text (following working pattern)
line_spacing = 20 # Same as working code
total_text_height = 0
line_heights = []
for line in final_lines:
line_bbox = draw.textbbox((0, 0), line, font=final_font)
line_height = line_bbox[3] - line_bbox[1]
line_heights.append(line_height)
total_text_height += line_height
# Add spacing between lines
if len(final_lines) > 1:
total_text_height += (len(final_lines) - 1) * line_spacing
# Check if total height fits, if not reduce font size
while total_text_height > target_height and font_size > 10:
font_size -= 2
final_font = ImageFont.truetype(PERSIAN_FONT_PATH, font_size)
# Recalculate heights
total_text_height = 0
line_heights = []
for line in final_lines:
line_bbox = draw.textbbox((0, 0), line, font=final_font)
line_height = line_bbox[3] - line_bbox[1]
line_heights.append(line_height)
total_text_height += line_height
if len(final_lines) > 1:
total_text_height += (len(final_lines) - 1) * line_spacing
# Center vertically in the erase box
y_start = erase_box[1] + (target_height - total_text_height) / 2
# 6. Draw the text (following working pattern)
current_y = y_start
for i, line in enumerate(final_lines):
# Center horizontally
line_width = draw.textlength(line, font=final_font)
x_center = erase_box[0] + (erase_box[2] - erase_box[0]) / 2
line_y_center = current_y + line_heights[i] / 2
# Draw shadow for visibility
draw.text(
(x_center + 1, line_y_center + 1),
line,
font=final_font,
fill=(0, 0, 0), # Black shadow
anchor="mm"
)
# Draw main text
draw.text(
(x_center, line_y_center),
line,
font=final_font,
fill=(255, 255, 255), # White text
anchor="mm"
)
current_y += line_heights[i] + line_spacing
return image_copy
# --- GRADIO INTERFACE ---
with gr.Blocks(title="Quote OCR Translator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📝 Quote Image Translator")
gr.Markdown("Upload an image with English text. See the Persian translation overlaid directly on the image!")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="Upload Quote Image",
type="pil",
sources=["upload", "clipboard"]
)
text_output = gr.Textbox(
label="Extracted Quote Text (English)",
placeholder="The detected English text will appear here...",
lines=4,
show_copy_button=True
)
translated_output = gr.Textbox(
label="Translated Quote (Persian)",
placeholder="The Persian translation will appear here...",
lines=4,
show_copy_button=True
)
with gr.Column(scale=1):
image_output = gr.Image(
label="Translated Image Output",
type="pil"
)
def process_and_overlay(image):
if image is None:
return "Please upload an image.", "Translation will appear here.", None
extracted_text, bbox = extract_text_and_bbox(image)
if bbox is None:
return extracted_text, "No text to translate.", None
translated_text = translate_text_gemini(extracted_text)
if "Error" in translated_text:
return extracted_text, translated_text, None
final_image = overlay_text_on_image(image, translated_text, bbox)
return extracted_text, translated_text, final_image
image_input.change(
fn=process_and_overlay,
inputs=[image_input],
outputs=[text_output, translated_output, image_output]
)
gr.Markdown("""
### 💡 How It Works:
1. **Text Detection:** The app uses OCR to find English text and its location.
2. **Inpainting:** It digitally "erases" the original text.
3. **Translation:** The text is sent to an AI for Persian translation.
4. **Overlay:** The Persian text is rendered and placed back onto the image.
""")
if __name__ == "__main__":
demo.launch()
|