ocrlight3 / app.py
kavehtaheri's picture
Update app.py
a6b4441 verified
import gradio as gr
import easyocr
from PIL import Image
import numpy as np
import google.generativeai as genai
import time
from gradio_client import Client, handle_file # <-- ADDED IMPORT
import requests
from urllib.parse import urlparse
import io
# --- Configuration ---
# Gemini API key - It's better to use environment variables, but this works for now.
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
# URL to your background image hosted on your Hugging Face Space.
# IMPORTANT: Replace 'YOUR-HF-USERNAME/YOUR-SPACE-NAME' with your actual space details.
# The file '1.jpg' must be in the root of your Space's repository.
BACKGROUND_IMAGE_URL = "1.jpg"
# Global reader - initialize once
reader = None
def initialize_reader():
"""Initialize EasyOCR reader"""
global reader
if reader is None:
print("Loading EasyOCR model...")
reader = easyocr.Reader(['en'], gpu=False, verbose=False)
print("EasyOCR model loaded successfully!")
return reader
def extract_text_from_quote(image):
"""Extract text from quote image using EasyOCR"""
if image is None:
return "Please upload an image first.", "Words: 0"
try:
reader = initialize_reader()
img_array = np.array(image)
results = reader.readtext(img_array, paragraph=True)
if results:
text_parts = [result[1].strip() for result in results if len(result) >= 2 and result[1].strip()]
if text_parts:
extracted_text = ' '.join(text_parts)
word_count = f"Words: {len(extracted_text.split())}"
return extracted_text, word_count
return "No text detected in the image.", "Words: 0"
except Exception as e:
return f"Error processing image: {str(e)}", "Words: 0"
def translate_extracted(text, lang):
"""Translate the extracted English text using Gemini API"""
if not text or "No text" in text or "Error" in text or "Please upload" in text:
return "No valid text to translate."
try:
print(f"DEBUG: API Key loaded (first 5 chars: {api_key[:5]}...). Starting translation to {lang}")
genai.configure(api_key=api_key)
for attempt in range(3):
try:
model = genai.GenerativeModel('gemini-2.0-flash')
# Updated prompt for clarity and conciseness
prompt = f"""
You are a cool, chill translator with a fun and warm personality, inspired by Persian Twitter style.
Your translations should be natural, slangy, and relatable. Use colloquial words and contractions.
No emojis, keep it RTL-friendly. Be concise but preserve the emotional depth.
Maintain correct grammar, even with slang. Avoid literal translations.
Translate the following English quote into this style.
English Quote: "{text}"
Format your output for an image overlay: Break the Persian text into short, visually appealing lines.
dont use emojies at all
Output ONLY the translated Persian text.
"""
response = model.generate_content(prompt)
translated = response.text.strip()
print(f"DEBUG: Translation successful on attempt {attempt+1}: {translated[:50]}...")
return translated
except Exception as inner_e:
print(f"DEBUG: Attempt {attempt+1} failed: {str(inner_e)}. Retrying in 2s...")
time.sleep(2)
raise Exception("All translation retries failed.")
except Exception as e:
error_msg = f"Error translating: {str(e)}. Check network access to Google API."
print(f"DEBUG: Translation failed: {str(e)}")
return error_msg
# NEW FUNCTION TO CALL THE SECOND HF SPACE
def overlay_text_on_image(translated_text):
"""
Sends translated text to the 'textoverimage1' Space and gets the resulting image.
"""
# Don't proceed if translation failed or is empty
if not translated_text or "Error" in translated_text or "No valid text" in translated_text:
print("DEBUG: Skipping image overlay due to invalid translated text.")
return None # Return None to clear the image output
try:
print("DEBUG: Initializing client for 'kavehtaheri/textoverimage1'")
client = Client("kavehtaheri/textoverimage1")
print(f"DEBUG: Sending data to API endpoint '/overlay_text_on_image'")
print(f"DEBUG: Persian Text: {translated_text}")
print(f"DEBUG: Image URL: {BACKGROUND_IMAGE_URL}")
# The handle_file function downloads the URL to a temporary file for upload
result_image_path = client.predict(
persian_text=translated_text,
url="", # Pass an empty string for URL if upload is used
upload=handle_file(BACKGROUND_IMAGE_URL),
username="aramnevis", # Not needed based on the API, can be empty
text_color="Black", # As specified in the screenshot
api_name="/overlay_text_on_image"
)
print(f"DEBUG: Received image path from API: {result_image_path}")
return result_image_path
except Exception as e:
print(f"ERROR: Could not get image from 'textoverimage1' space. Error: {e}")
# Return a placeholder or raise an error in Gradio UI
# For now, we return None which will clear the output
return None
# UPDATED FUNCTION TO FETCH IMAGE FROM INSTAGRAM VIA ONE-API (BASED ON PROVIDED SAMPLE)
def get_instagram_image(ig_url):
"""Fetch image from Instagram post using One-API."""
if not ig_url:
return None
try:
one_api_key = "268976:66f4f58a2a905"
shortcode = ig_url.split("/")[-2]
url_one = "https://api.one-api.ir/instagram/v1/post/?shortcode=" + shortcode
headers = {
"accept": "application/json",
"one-api-token": one_api_key,
"Content-Type": "application/json"
}
response = requests.get(url_one, headers=headers)
if response.status_code == 200:
result = response.json()
# Try main media URL first
main_url = result.get("result", {}).get('media', [{}])[0].get("url")
if main_url:
try:
img_response = requests.get(main_url)
img_response.raise_for_status()
img = Image.open(io.BytesIO(img_response.content))
return img # Return if successfully opened as image
except Exception as e:
print(f"DEBUG: Main URL not an image: {str(e)}. Trying cover...")
# Fallback to cover URL
cover_url = result.get("result", {}).get('media', [{}])[0].get("cover")
if cover_url:
img_response = requests.get(cover_url)
img_response.raise_for_status()
img = Image.open(io.BytesIO(img_response.content))
return img
raise ValueError("No valid image URL found in API response.")
else:
raise ValueError(f"API error: {response.status_code}, {response.text}")
except Exception as e:
print(f"ERROR fetching Instagram image: {str(e)}")
return None
def clear_all():
"""Clear all inputs and outputs"""
return None, None, "Your extracted quote will appear here...", "Words: 0", "Translation will appear here...", None, True # Clear image, ig_url, and reset checkbox
# --- Gradio Interface ---
with gr.Blocks(title="Quote OCR & Overlay", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📝 Quote Text Extractor & Image Generator")
gr.Markdown("Upload an image to extract text, translate it, and overlay it onto a new background.")
with gr.Row():
# --- INPUT COLUMN ---
with gr.Column(scale=1):
image_input = gr.Image(label="1. Upload Quote Image", type="pil", sources=["upload", "clipboard"])
ig_input = gr.Textbox(label="Or Instagram Link", placeholder="e.g., https://www.instagram.com/p/C-ODQjyy4N3/")
with gr.Row():
clear_btn = gr.Button("Clear All", variant="secondary")
extract_btn = gr.Button("Extract & Translate", variant="primary")
target_lang = gr.Dropdown(
label="Target Language",
choices=["persian(farsi)"], # Locked to Persian as per the logic
value="persian(farsi)",
interactive=False # Not changeable since the prompt is hardcoded for Persian
)
auto_translate = gr.Checkbox(label="Auto-Process After Upload", value=True)
# --- OUTPUTS COLUMN ---
with gr.Column(scale=2):
text_output = gr.Textbox(label="2. Extracted English Text", placeholder="Extracted text appears here...", lines=4, show_copy_button=True)
word_count = gr.Textbox(label="Word Count", interactive=False, max_lines=1)
translated_output = gr.Textbox(label="3. Translated Persian Text", placeholder="Persian translation appears here...", lines=4, show_copy_button=True)
gr.Markdown("---") # Separator
final_image_output = gr.Image(label="4. Final Image with Text Overlay", type="filepath")
# --- Event Handlers ---
# Combined function for the main button and auto-processing
def process_everything(image, ig_url, lang, auto_process_enabled):
if not auto_process_enabled:
# If auto-process is off, we still run extract but not the rest
text, wc = extract_text_from_quote(image)
return text, wc, "Translation will appear here...", None
# Determine source image: prioritize uploaded image if provided, else IG link
if image is not None:
source_image = image
elif ig_url:
source_image = get_instagram_image(ig_url)
if source_image is None:
return "Error fetching image from Instagram link.", "Words: 0", "Translation failed: No image fetched.", None
else:
return "Please upload an image or provide an Instagram link.", "Words: 0", "Translation failed: No input provided.", None
text, wc = extract_text_from_quote(source_image)
# Proceed only if text was found
if "No text" in text or "Error" in text:
return text, wc, "Translation failed: No text extracted.", None
translated = translate_extracted(text, lang)
final_image = overlay_text_on_image(translated)
return text, wc, translated, final_image
# The main button triggers the full pipeline
extract_btn.click(
fn=process_everything,
inputs=[image_input, ig_input, target_lang, gr.State(True)], # Pass True to force processing
outputs=[text_output, word_count, translated_output, final_image_output]
)
# Changing the image triggers the pipeline only if 'auto-translate' is checked
image_input.change(
fn=process_everything,
inputs=[image_input, ig_input, target_lang, auto_translate],
outputs=[text_output, word_count, translated_output, final_image_output]
)
# Changing the IG link triggers the pipeline only if 'auto-translate' is checked
ig_input.change(
fn=process_everything,
inputs=[image_input, ig_input, target_lang, auto_translate],
outputs=[text_output, word_count, translated_output, final_image_output]
)
# Clear button action
clear_btn.click(
fn=clear_all,
outputs=[image_input, ig_input, text_output, word_count, translated_output, final_image_output, auto_translate]
)
gr.Markdown("### 💡 How It Works:\n1. Upload a clear image containing English text OR provide an Instagram post link.\n2. The app automatically extracts the text using OCR.\n3. The text is translated to a casual, modern Persian.\n4. The Persian text is sent to a second app which overlays it on a background image.\n5. The final image is displayed.")
if __name__ == "__main__":
# Add requirements to your requirements.txt:
# gradio
# easyocr
# pillow
# numpy
# google-generativeai
# gradio_client
# requests
demo.launch()