Spaces:

kavehtaheri
/

ocrlight3

Sleeping

File size: 12,565 Bytes

d8c4f81
 
 
 
 
 
2523ada
 
 
 
d8c4f81
 
2523ada
 
d8c4f81
 
2523ada
 
d8c4f81
 
2523ada
d8c4f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0824ca5
d8c4f81
 
 
 
 
 
 
 
 
2523ada
 
d8c4f81
2523ada
d8c4f81
a6b4441
2523ada
d8c4f81
 
 
 
 
 
 
 
 
 
763a3ff
d8c4f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2523ada
d8c4f81
 
 
 
2523ada
d8c4f81
 
2523ada
d8c4f81
 
 
 
 
 
 
 
 
2523ada
d8c4f81
 
2523ada
d8c4f81
0fdf29c
2523ada
d8c4f81
 
 
 
 
 
 
 
2523ada
 
d8c4f81
 
35beac2
2523ada
 
 
 
d8c4f81
2523ada
35beac2
 
 
2523ada
 
35beac2
 
2523ada
35beac2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8c4f81
2523ada
 
 
d8c4f81
2523ada
 
 
d8c4f81
 
 
 
 
2523ada
d8c4f81
 
 
 
2523ada
 
 
 
 
 
d8c4f81
 
2523ada
d8c4f81
2523ada
d8c4f81
2523ada
 
d8c4f81
 
 
 
 
2523ada
 
 
d8c4f81
 
 
 
2523ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8c4f81
 
2523ada
d8c4f81
 
 
2523ada
d8c4f81
2523ada
 
d8c4f81
 
 
2523ada
 
 
 
 
d8c4f81
 
2523ada
d8c4f81
 
2523ada
d8c4f81
 
2523ada
d8c4f81
 
2523ada
d8c4f81
 
 
 
 
 
 
2523ada

import gradio as gr
import easyocr
from PIL import Image
import numpy as np
import google.generativeai as genai
import time
from gradio_client import Client, handle_file  # <-- ADDED IMPORT
import requests
from urllib.parse import urlparse
import io

# --- Configuration ---
# Gemini API key - It's better to use environment variables, but this works for now.
api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA" 

# URL to your background image hosted on your Hugging Face Space.
# IMPORTANT: Replace 'YOUR-HF-USERNAME/YOUR-SPACE-NAME' with your actual space details.
# The file '1.jpg' must be in the root of your Space's repository.
BACKGROUND_IMAGE_URL = "1.jpg"

# Global reader - initialize once
reader = None

def initialize_reader():
    """Initialize EasyOCR reader"""
    global reader
    if reader is None:
        print("Loading EasyOCR model...")
        reader = easyocr.Reader(['en'], gpu=False, verbose=False)
        print("EasyOCR model loaded successfully!")
    return reader

def extract_text_from_quote(image):
    """Extract text from quote image using EasyOCR"""
    if image is None:
        return "Please upload an image first.", "Words: 0"
    
    try:
        reader = initialize_reader()
        img_array = np.array(image)
        results = reader.readtext(img_array, paragraph=True)
        
        if results:
            text_parts = [result[1].strip() for result in results if len(result) >= 2 and result[1].strip()]
            if text_parts:
                extracted_text = ' '.join(text_parts)
                word_count = f"Words: {len(extracted_text.split())}"
                return extracted_text, word_count
        
        return "No text detected in the image.", "Words: 0"
            
    except Exception as e:
        return f"Error processing image: {str(e)}", "Words: 0"

def translate_extracted(text, lang):
    """Translate the extracted English text using Gemini API"""
    if not text or "No text" in text or "Error" in text or "Please upload" in text:
        return "No valid text to translate."
    
    try:
        print(f"DEBUG: API Key loaded (first 5 chars: {api_key[:5]}...). Starting translation to {lang}")
        genai.configure(api_key=api_key)
        
        for attempt in range(3):
            try:
                model = genai.GenerativeModel('gemini-2.0-flash')
                # Updated prompt for clarity and conciseness
                prompt = f"""
                You are a cool, chill translator with a fun and warm personality, inspired by Persian Twitter style.
                Your translations should be natural, slangy, and relatable. Use colloquial words and contractions.
                No emojis, keep it RTL-friendly. Be concise but preserve the emotional depth.
                Maintain correct grammar, even with slang. Avoid literal translations.
                Translate the following English quote into this style.
                
                English Quote: "{text}"
                
                Format your output for an image overlay: Break the Persian text into short, visually appealing lines.
                dont use emojies at all
                Output ONLY the translated Persian text.
                """
                response = model.generate_content(prompt)
                translated = response.text.strip()
                print(f"DEBUG: Translation successful on attempt {attempt+1}: {translated[:50]}...")
                return translated
            except Exception as inner_e:
                print(f"DEBUG: Attempt {attempt+1} failed: {str(inner_e)}. Retrying in 2s...")
                time.sleep(2)
        
        raise Exception("All translation retries failed.")
    
    except Exception as e:
        error_msg = f"Error translating: {str(e)}. Check network access to Google API."
        print(f"DEBUG: Translation failed: {str(e)}")
        return error_msg

# NEW FUNCTION TO CALL THE SECOND HF SPACE
def overlay_text_on_image(translated_text):
    """
    Sends translated text to the 'textoverimage1' Space and gets the resulting image.
    """
    # Don't proceed if translation failed or is empty
    if not translated_text or "Error" in translated_text or "No valid text" in translated_text:
        print("DEBUG: Skipping image overlay due to invalid translated text.")
        return None # Return None to clear the image output

    try:
        print("DEBUG: Initializing client for 'kavehtaheri/textoverimage1'")
        client = Client("kavehtaheri/textoverimage1")
        
        print(f"DEBUG: Sending data to API endpoint '/overlay_text_on_image'")
        print(f"DEBUG: Persian Text: {translated_text}")
        print(f"DEBUG: Image URL: {BACKGROUND_IMAGE_URL}")

        # The handle_file function downloads the URL to a temporary file for upload
        result_image_path = client.predict(
            persian_text=translated_text,
            url="", # Pass an empty string for URL if upload is used
            upload=handle_file(BACKGROUND_IMAGE_URL),
            username="aramnevis", # Not needed based on the API, can be empty
            text_color="Black", # As specified in the screenshot
            api_name="/overlay_text_on_image"
        )
        
        print(f"DEBUG: Received image path from API: {result_image_path}")
        return result_image_path

    except Exception as e:
        print(f"ERROR: Could not get image from 'textoverimage1' space. Error: {e}")
        # Return a placeholder or raise an error in Gradio UI
        # For now, we return None which will clear the output
        return None

# UPDATED FUNCTION TO FETCH IMAGE FROM INSTAGRAM VIA ONE-API (BASED ON PROVIDED SAMPLE)
def get_instagram_image(ig_url):
    """Fetch image from Instagram post using One-API."""
    if not ig_url:
        return None
    
    try:
        one_api_key = "268976:66f4f58a2a905"
        shortcode = ig_url.split("/")[-2]
        url_one = "https://api.one-api.ir/instagram/v1/post/?shortcode=" + shortcode
        headers = {
            "accept": "application/json",
            "one-api-token": one_api_key,
            "Content-Type": "application/json"
        }
        response = requests.get(url_one, headers=headers)
        if response.status_code == 200:
            result = response.json()
            
            # Try main media URL first
            main_url = result.get("result", {}).get('media', [{}])[0].get("url")
            if main_url:
                try:
                    img_response = requests.get(main_url)
                    img_response.raise_for_status()
                    img = Image.open(io.BytesIO(img_response.content))
                    return img  # Return if successfully opened as image
                except Exception as e:
                    print(f"DEBUG: Main URL not an image: {str(e)}. Trying cover...")
            
            # Fallback to cover URL
            cover_url = result.get("result", {}).get('media', [{}])[0].get("cover")
            if cover_url:
                img_response = requests.get(cover_url)
                img_response.raise_for_status()
                img = Image.open(io.BytesIO(img_response.content))
                return img
            
            raise ValueError("No valid image URL found in API response.")
        else:
            raise ValueError(f"API error: {response.status_code}, {response.text}")
    
    except Exception as e:
        print(f"ERROR fetching Instagram image: {str(e)}")
        return None

def clear_all():
    """Clear all inputs and outputs"""
    return None, None, "Your extracted quote will appear here...", "Words: 0", "Translation will appear here...", None, True  # Clear image, ig_url, and reset checkbox

# --- Gradio Interface ---
with gr.Blocks(title="Quote OCR & Overlay", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("# 📝 Quote Text Extractor & Image Generator")
    gr.Markdown("Upload an image to extract text, translate it, and overlay it onto a new background.")
    
    with gr.Row():
        # --- INPUT COLUMN ---
        with gr.Column(scale=1):
            image_input = gr.Image(label="1. Upload Quote Image", type="pil", sources=["upload", "clipboard"])
            ig_input = gr.Textbox(label="Or Instagram Link", placeholder="e.g., https://www.instagram.com/p/C-ODQjyy4N3/")
            with gr.Row():
                clear_btn = gr.Button("Clear All", variant="secondary")
                extract_btn = gr.Button("Extract & Translate", variant="primary")
            
            target_lang = gr.Dropdown(
                label="Target Language",
                choices=["persian(farsi)"], # Locked to Persian as per the logic
                value="persian(farsi)",
                interactive=False # Not changeable since the prompt is hardcoded for Persian
            )
            auto_translate = gr.Checkbox(label="Auto-Process After Upload", value=True)

        # --- OUTPUTS COLUMN ---
        with gr.Column(scale=2):
            text_output = gr.Textbox(label="2. Extracted English Text", placeholder="Extracted text appears here...", lines=4, show_copy_button=True)
            word_count = gr.Textbox(label="Word Count", interactive=False, max_lines=1)
            translated_output = gr.Textbox(label="3. Translated Persian Text", placeholder="Persian translation appears here...", lines=4, show_copy_button=True)
            
            gr.Markdown("---") # Separator
            
            final_image_output = gr.Image(label="4. Final Image with Text Overlay", type="filepath")

    # --- Event Handlers ---
    
    # Combined function for the main button and auto-processing
    def process_everything(image, ig_url, lang, auto_process_enabled):
        if not auto_process_enabled:
             # If auto-process is off, we still run extract but not the rest
            text, wc = extract_text_from_quote(image)
            return text, wc, "Translation will appear here...", None

        # Determine source image: prioritize uploaded image if provided, else IG link
        if image is not None:
            source_image = image
        elif ig_url:
            source_image = get_instagram_image(ig_url)
            if source_image is None:
                return "Error fetching image from Instagram link.", "Words: 0", "Translation failed: No image fetched.", None
        else:
            return "Please upload an image or provide an Instagram link.", "Words: 0", "Translation failed: No input provided.", None
        
        text, wc = extract_text_from_quote(source_image)
        # Proceed only if text was found
        if "No text" in text or "Error" in text:
            return text, wc, "Translation failed: No text extracted.", None
        
        translated = translate_extracted(text, lang)
        final_image = overlay_text_on_image(translated)
        
        return text, wc, translated, final_image

    # The main button triggers the full pipeline
    extract_btn.click(
        fn=process_everything,
        inputs=[image_input, ig_input, target_lang, gr.State(True)], # Pass True to force processing
        outputs=[text_output, word_count, translated_output, final_image_output]
    )

    # Changing the image triggers the pipeline only if 'auto-translate' is checked
    image_input.change(
        fn=process_everything,
        inputs=[image_input, ig_input, target_lang, auto_translate],
        outputs=[text_output, word_count, translated_output, final_image_output]
    )
    
    # Changing the IG link triggers the pipeline only if 'auto-translate' is checked
    ig_input.change(
        fn=process_everything,
        inputs=[image_input, ig_input, target_lang, auto_translate],
        outputs=[text_output, word_count, translated_output, final_image_output]
    )
    
    # Clear button action
    clear_btn.click(
        fn=clear_all,
        outputs=[image_input, ig_input, text_output, word_count, translated_output, final_image_output, auto_translate]
    )
    
    gr.Markdown("### 💡 How It Works:\n1. Upload a clear image containing English text OR provide an Instagram post link.\n2. The app automatically extracts the text using OCR.\n3. The text is translated to a casual, modern Persian.\n4. The Persian text is sent to a second app which overlays it on a background image.\n5. The final image is displayed.")

if __name__ == "__main__":
    # Add requirements to your requirements.txt:
    # gradio
    # easyocr
    # pillow
    # numpy
    # google-generativeai
    # gradio_client
    # requests
    demo.launch()