OCRLIGHT2 / app.py
kavehtaheri's picture
Update app.py
2f2842f verified
import gradio as gr
import easyocr
from PIL import Image
import numpy as np
import google.generativeai as genai
import time # For retry delays
# Gemini API key - Hardcoded internal variable (replace with your actual key!)
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
# Global reader - initialize once
reader = None
def initialize_reader():
"""Initialize EasyOCR reader"""
global reader
if reader is None:
print("Loading EasyOCR model...")
reader = easyocr.Reader(['en'], gpu=False, verbose=False)
print("EasyOCR model loaded successfully!")
return reader
def extract_text_from_quote(image):
"""Extract text from quote image using EasyOCR"""
if image is None:
return "Please upload an image first.", "Words: 0"
try:
# Initialize reader
reader = initialize_reader()
# Convert PIL to numpy array
img_array = np.array(image)
# Extract text with EasyOCR
results = reader.readtext(img_array, paragraph=True)
# Extract just the text from results
if results:
# EasyOCR returns list of (bbox, text, confidence)
text_parts = []
for result in results:
if len(result) >= 2 and result[1].strip(): # Check if text exists
text_parts.append(result[1].strip())
if text_parts:
extracted_text = ' '.join(text_parts)
word_count = f"Words: {len(extracted_text.split())}"
return extracted_text, word_count
return "No text detected in the image.", "Words: 0"
except Exception as e:
return f"Error processing image: {str(e)}", "Words: 0"
def translate_extracted(text, lang):
"""Translate the extracted English text using Gemini API with retry"""
if not text or "No text" in text or "Error" in text or "Please upload" in text:
return "No valid text to translate."
try:
print(f"DEBUG: API Key loaded (first 5 chars: {api_key[:5]}...). Starting translation to {lang}")
genai.configure(api_key=api_key)
# Retry logic for HF network issues (up to 3 attempts)
for attempt in range(3):
try:
model = genai.GenerativeModel('gemini-1.5-flash') # Primary model
prompt = f"Translate the following English text accurately to persian,You are a cool, chill translator with a fun and warm personality, inspired by Persian Twitter (توویتر فارسی) style. Your translations should be natural, slangy, touching, and relatable—like casual chats with a friend: short, heartfelt, with colloquial Persian words (e.g. for cultural fit, “فک می‌کنی” for “think,” contractions like “اونی‌ام”). No emojis, keep it RTL-friendly for Persian text. Be concise but preserve the emotional depth and humor.Translate the following English fact/quote into Persian in this style , Maintain correct grammar, even when using slang. Avoid direct, literal translations that sound unnatural in Persian.. Make it feel like a real tweet: warm, fun, and human.English fact: [{text}]Output only the Persian whit Format for image overlay: Break the Persian text into short, visually appealing lines.donot use any emojies at all Ensure the output is RTL-friendly and the text should be out any self represent or emojies just translated text "
response = model.generate_content(prompt)
translated = response.text.strip()
print(f"DEBUG: Translation successful on attempt {attempt+1}: {translated[:50]}...")
return translated
except Exception as inner_e:
print(f"DEBUG: Attempt {attempt+1} failed: {str(inner_e)}. Retrying...")
if 'flash' in str(inner_e): # Fallback to pro if flash unavailable
model = genai.GenerativeModel('gemini-pro')
time.sleep(2) # Delay between retries
raise Exception("All retries failed.")
except Exception as e:
error_msg = f"Error translating: {str(e)}. Check HF logs for details. (If on HF, ensure network allows Google API access.)"
print(f"DEBUG: Translation failed: {str(e)}")
return error_msg
def clear_all():
"""Clear all inputs and outputs"""
return None, "Your extracted quote will appear here...", "Words: 0", "Translation will appear here...", False # Include auto-translate checkbox
# Create Gradio interface
with gr.Blocks(title="Quote OCR Extractor", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📝 Quote Text Extractor")
gr.Markdown("Upload a quote image and extract the text instantly!")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="Upload Quote Image",
type="pil",
sources=["upload", "clipboard"]
)
with gr.Row():
clear_btn = gr.Button("Clear", variant="secondary")
extract_btn = gr.Button("Extract Text", variant="primary")
with gr.Column(scale=1):
text_output = gr.Textbox(
label="Extracted Quote Text (English)",
placeholder="Your extracted quote will appear here...",
lines=8,
max_lines=15,
show_copy_button=True
)
word_count = gr.Textbox(
label="Word Count",
interactive=False,
max_lines=1
)
target_lang = gr.Dropdown(
label="Select Target Language for Translation",
choices=["Spanish", "French", "German", "Chinese", "Arabic", "persian(farsi)"],
value="persian(farsi)" # Default
)
auto_translate = gr.Checkbox(
label="Auto-Translate After Extraction",
value=True
)
translate_btn = gr.Button("Translate Extracted Text", variant="primary")
translated_output = gr.Textbox(
label="Translated Quote",
placeholder="Translation will appear here...",
lines=8,
max_lines=15,
show_copy_button=True
)
# Event handlers
def extract_and_translate(image, lang, auto):
text, wc = extract_text_from_quote(image)
trans = "Translation will appear here..." # Default
if auto and "No text" not in text and "Error" not in text and "Please upload" not in text:
trans = translate_extracted(text, lang)
return text, wc, trans
extract_btn.click(
fn=extract_and_translate,
inputs=[image_input, target_lang, auto_translate],
outputs=[text_output, word_count, translated_output]
)
image_input.change(
fn=extract_and_translate,
inputs=[image_input, target_lang, auto_translate],
outputs=[text_output, word_count, translated_output]
)
translate_btn.click(
fn=translate_extracted,
inputs=[text_output, target_lang],
outputs=translated_output
)
clear_btn.click(
fn=clear_all,
outputs=[image_input, text_output, word_count, translated_output, auto_translate]
)
gr.Markdown("""
### 💡 Tips for Best Results:
- Upload clear images with good contrast
- Works best with simple backgrounds
- Supports PNG, JPG, and other common formats
- Text will be automatically extracted and formatted
- Translation assumes extracted text is English; select a target language and click Translate
- If translation fails on HF, check your API key and console logs!
""")
if __name__ == "__main__":
demo.launch()