Spaces:

kavehtaheri
/

ocrlight3

Sleeping

App Files Files Community

kavehtaheri commited on Jul 23, 2025

Commit

2523ada

verified ·

1 Parent(s): f903719

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -178

app.py CHANGED Viewed

@@ -4,24 +4,21 @@ from PIL import Image
 import numpy as np
 import google.generativeai as genai
 import time
-from gradio_client import Client, handle_file
-import requests # Used for API calls
-import io       # Used to handle image bytes from download
-import re       # Used for regex parsing of the URL shortcode
 # --- Configuration ---
-# Gemini API key
-# IMPORTANT: Replace with your actual Gemini API Key for translation
-GEMINI_API_KEY = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
-# One-API Token for Instagram Downloader
-# IMPORTANT: Replace with your token from one-api.ir
-ONE_API_TOKEN = "268976:66f4f58a2a905"
 # URL to your background image hosted on your Hugging Face Space.
 BACKGROUND_IMAGE_URL = "1.jpg"
-# Global reader - initialize once to save time
 reader = None
 def initialize_reader():
@@ -33,89 +30,6 @@ def initialize_reader():
         print("EasyOCR model loaded successfully!")
     return reader
-# --- MODIFIED FUNCTION TO DOWNLOAD FROM INSTAGRAM USING SHORTCODE ---
-def download_instagram_image(url):
-    """
-    Downloads the first image from an Instagram post URL using One-API by extracting the shortcode.
-    Returns a PIL Image object on success or an error message string on failure.
-    """
-    if not url or not url.strip():
-        return "Please enter an Instagram URL."
-    # Regex to extract the shortcode from various Instagram URL formats
-    # It looks for patterns like /p/..., /reel/..., or /tv/...
-    # Example: https://www.instagram.com/p/DMaqqN_RuqQ/ -> extracts 'DMaqqN_RuqQ'
-    match = re.search(r"/(?:p|reel|tv)/([a-zA-Z0-9_-]+)", url.strip())
-    if not match:
-        print(f"ERROR: Could not extract shortcode from URL: {url.strip()}")
-        return "Invalid Instagram URL format. Please provide a valid post or reel URL."
-    shortcode = match.group(1)
-    print(f"DEBUG: Extracted shortcode: {shortcode}")
-    # Use the new API endpoint with the 'shortcode' parameter, as per your curl example
-    api_endpoint = f"https://api.one-api.ir/instagram/v1/post/?shortcode={shortcode}"
-    print(f"DEBUG: Calling One-API with endpoint: {api_endpoint}")
-    try:
-        # Set headers as specified in the curl command, especially the token
-        headers = {
-            'one-api-token': ONE_API_TOKEN,
-            'accept': 'application/json'
-        }
-        response = requests.get(api_endpoint, headers=headers, timeout=30)
-        response.raise_for_status()  # This will raise an HTTPError for 4xx or 5xx status codes
-        data = response.json()
-        # Check for success and find the image URL in the response
-        if data.get("ok") and data.get("result"):
-            media_list = data["result"].get("media", [])
-            image_url = None
-            # Find the first item that is an image
-            for item in media_list:
-                if item.get("type") == "image":
-                    image_url = item.get("url")
-                    break
-            if not image_url:
-                return "API Error: No image found in the post media."
-        else:
-            error_message = data.get("message", "Unknown API error.")
-            print(f"ERROR: One-API call failed. Message: {error_message}")
-            return f"API Error: {error_message}"
-        print(f"DEBUG: Found image URL: {image_url[:60]}...")
-        # Download the actual image content from the URL found
-        image_response = requests.get(image_url, timeout=30)
-        image_response.raise_for_status()
-        # Open the image from the downloaded bytes and return it as a PIL Image object
-        image = Image.open(io.BytesIO(image_response.content))
-        print("DEBUG: Instagram image downloaded and converted to PIL Image successfully.")
-        return image
-    except requests.exceptions.HTTPError as e:
-        # This will catch errors like 403 Forbidden, 404 Not Found, etc.
-        error_details = f"API Error ({e.response.status_code}): {e.response.reason}."
-        try:
-            # Try to get more specific error message from the API's JSON response
-            error_json = e.response.json()
-            error_details += f" Details: {error_json.get('message', 'No additional details.')}"
-        except requests.exceptions.JSONDecodeError:
-            error_details += f" Raw response: {e.response.text}"
-        print(f"ERROR: HTTP error occurred: {error_details}")
-        return error_details
-    except requests.exceptions.RequestException as e:
-        print(f"ERROR: Network error while contacting API or downloading image: {e}")
-        return f"Network Error: Could not retrieve data. Please check the connection. Details: {e}"
-    except Exception as e:
-        print(f"ERROR: An unexpected error occurred in download_instagram_image: {e}")
-        return f"An unexpected error occurred: {str(e)}"
 def extract_text_from_quote(image):
     """Extract text from quote image using EasyOCR"""
     if image is None:
@@ -124,11 +38,9 @@ def extract_text_from_quote(image):
     try:
         reader = initialize_reader()
         img_array = np.array(image)
-        # Using paragraph=True helps group related lines of text
         results = reader.readtext(img_array, paragraph=True)
         if results:
-            # Join all detected text blocks with a space
             text_parts = [result[1].strip() for result in results if len(result) >= 2 and result[1].strip()]
             if text_parts:
                 extracted_text = ' '.join(text_parts)
@@ -146,12 +58,13 @@ def translate_extracted(text, lang):
         return "No valid text to translate."
     try:
-        print(f"DEBUG: API Key loaded. Starting translation to {lang}")
-        genai.configure(api_key=GEMINI_API_KEY)
-        for attempt in range(3): # Simple retry logic
             try:
                 model = genai.GenerativeModel('gemini-1.5-flash')
                 prompt = f"""
                 You are a cool, chill translator with a fun and warm personality, inspired by Persian Twitter style.
                 Your translations should be natural, slangy, and relatable. Use colloquial words and contractions.
@@ -179,13 +92,15 @@ def translate_extracted(text, lang):
         print(f"DEBUG: Translation failed: {str(e)}")
         return error_msg
 def overlay_text_on_image(translated_text):
     """
     Sends translated text to the 'textoverimage1' Space and gets the resulting image.
     """
     if not translated_text or "Error" in translated_text or "No valid text" in translated_text:
         print("DEBUG: Skipping image overlay due to invalid translated text.")
-        return None
     try:
         print("DEBUG: Initializing client for 'kavehtaheri/textoverimage1'")
@@ -195,12 +110,13 @@ def overlay_text_on_image(translated_text):
         print(f"DEBUG: Persian Text: {translated_text}")
         print(f"DEBUG: Image URL: {BACKGROUND_IMAGE_URL}")
         result_image_path = client.predict(
             persian_text=translated_text,
-            url="",
             upload=handle_file(BACKGROUND_IMAGE_URL),
-            username="",
-            text_color="Black",
             api_name="/overlay_text_on_image"
         )
@@ -209,123 +125,156 @@ def overlay_text_on_image(translated_text):
     except Exception as e:
         print(f"ERROR: Could not get image from 'textoverimage1' space. Error: {e}")
         return None
-def clear_all():
-    """Clear all inputs and outputs"""
-    return None, "", "Your extracted quote will appear here...", "Words: 0", "Translation will appear here...", None, True
-# --- Main Processing Functions ---
-def process_everything(image, lang):
-    """The core pipeline: OCR -> Translate -> Overlay. Accepts a PIL image."""
-    if image is None:
-        return "Please provide an image first.", "Words: 0", "Translation failed: No image provided.", None
-    text, wc = extract_text_from_quote(image)
-    if "No text" in text or "Error" in text:
-        return text, wc, "Translation failed: No text extracted.", None
-    translated = translate_extracted(text, lang)
-    final_image = overlay_text_on_image(translated)
-    return text, wc, translated, final_image
-def process_from_url(url, lang):
-    """Handler for the Instagram URL workflow."""
-    gr.Info("Downloading image from Instagram...")
-    download_result = download_instagram_image(url)
-    # Check if the download function returned an error string
-    if isinstance(download_result, str):
-        # The download failed, show an error and stop the process
-        gr.Error(f"Download Failed: {download_result}")
-        return download_result, "Words: 0", "Process failed.", None, None
-    # If successful, it returned a PIL image. Now process it.
-    gr.Info("Image downloaded! Starting OCR and Translation...")
-    text, wc, translated, final_image = process_everything(download_result, lang)
-    # Return the downloaded image to populate the input image box for user reference
-    return text, wc, translated, final_image, download_result
 # --- Gradio Interface ---
 with gr.Blocks(title="Quote OCR & Overlay", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Quote Text Extractor & Image Generator")
-    gr.Markdown("Extract text from an image, translate it, and overlay it onto a new background. Choose your input method below.")
     with gr.Row():
         # --- INPUT COLUMN ---
         with gr.Column(scale=1):
-            with gr.Tabs():
-                with gr.Tab("1. Upload Image"):
-                    image_input = gr.Image(label="Upload Quote Image", type="pil", sources=["upload", "clipboard"])
-                    auto_process_cb = gr.Checkbox(label="Auto-Process After Upload", value=True)
-                with gr.Tab("1. Download from Instagram URL"):
-                    insta_url_input = gr.Textbox(label="Instagram Post URL", placeholder="Paste a link like https://www.instagram.com/p/C0123ABCD.../")
-                    insta_process_btn = gr.Button("Download & Process", variant="primary")
             target_lang = gr.Dropdown(
                 label="Target Language",
-                choices=["persian(farsi)"],
                 value="persian(farsi)",
-                interactive=False
             )
-            with gr.Row():
-                clear_btn = gr.Button("Clear All", variant="secondary")
-                extract_btn = gr.Button("Process Uploaded Image", variant="primary")
         # --- OUTPUTS COLUMN ---
         with gr.Column(scale=2):
             text_output = gr.Textbox(label="2. Extracted English Text", placeholder="Extracted text appears here...", lines=4, show_copy_button=True)
             word_count = gr.Textbox(label="Word Count", interactive=False, max_lines=1)
             translated_output = gr.Textbox(label="3. Translated Persian Text", placeholder="Persian translation appears here...", lines=4, show_copy_button=True)
-            gr.Markdown("---")
             final_image_output = gr.Image(label="4. Final Image with Text Overlay", type="filepath")
     # --- Event Handlers ---
-    # 1. For the "Process Uploaded Image" button
     extract_btn.click(
         fn=process_everything,
-        inputs=[image_input, target_lang],
         outputs=[text_output, word_count, translated_output, final_image_output]
     )
-    # 2. For auto-processing when an image is uploaded
-    def auto_process_wrapper(image, lang, is_enabled):
-        if is_enabled:
-            return process_everything(image, lang)
-        # If auto-process is off, just do OCR and stop
-        text, wc = extract_text_from_quote(image)
-        return text, wc, "Translation will appear here...", None
     image_input.change(
-        fn=auto_process_wrapper,
-        inputs=[image_input, target_lang, auto_process_cb],
         outputs=[text_output, word_count, translated_output, final_image_output]
     )
-    # 3. For the "Download & Process" button in the Instagram tab
-    insta_process_btn.click(
-        fn=process_from_url,
-        inputs=[insta_url_input, target_lang],
-        # The outputs now include the image_input to show the downloaded image
-        outputs=[text_output, word_count, translated_output, final_image_output, image_input]
     )
-    # 4. For the Clear button
     clear_btn.click(
         fn=clear_all,
-        outputs=[image_input, insta_url_input, text_output, word_count, translated_output, final_image_output, auto_process_cb]
     )
-    gr.Markdown("### 💡 How It Works:\n1. **Upload an image** OR **paste an Instagram URL**.\n2. The app automatically extracts the English text using OCR.\n3. The text is translated to a casual, modern Persian.\n4. The Persian text is sent to a second app which overlays it on a background image.\n5. The final image is displayed.")
 if __name__ == "__main__":
-    # Ensure these packages are in your requirements.txt:
     # gradio
     # easyocr
     # pillow
@@ -333,4 +282,4 @@ if __name__ == "__main__":
     # google-generativeai
     # gradio_client
     # requests
-    demo.launch(debug=True)

 import numpy as np
 import google.generativeai as genai
 import time
+from gradio_client import Client, handle_file  # <-- ADDED IMPORT
+import requests
+from urllib.parse import urlparse
+import io
 # --- Configuration ---
+# Gemini API key - It's better to use environment variables, but this works for now.
+api_key = "AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
 # URL to your background image hosted on your Hugging Face Space.
+# IMPORTANT: Replace 'YOUR-HF-USERNAME/YOUR-SPACE-NAME' with your actual space details.
+# The file '1.jpg' must be in the root of your Space's repository.
 BACKGROUND_IMAGE_URL = "1.jpg"
+# Global reader - initialize once
 reader = None
 def initialize_reader():
         print("EasyOCR model loaded successfully!")
     return reader
 def extract_text_from_quote(image):
     """Extract text from quote image using EasyOCR"""
     if image is None:
     try:
         reader = initialize_reader()
         img_array = np.array(image)
         results = reader.readtext(img_array, paragraph=True)
         if results:
             text_parts = [result[1].strip() for result in results if len(result) >= 2 and result[1].strip()]
             if text_parts:
                 extracted_text = ' '.join(text_parts)
         return "No valid text to translate."
     try:
+        print(f"DEBUG: API Key loaded (first 5 chars: {api_key[:5]}...). Starting translation to {lang}")
+        genai.configure(api_key=api_key)
+        for attempt in range(3):
             try:
                 model = genai.GenerativeModel('gemini-1.5-flash')
+                # Updated prompt for clarity and conciseness
                 prompt = f"""
                 You are a cool, chill translator with a fun and warm personality, inspired by Persian Twitter style.
                 Your translations should be natural, slangy, and relatable. Use colloquial words and contractions.
         print(f"DEBUG: Translation failed: {str(e)}")
         return error_msg
+# NEW FUNCTION TO CALL THE SECOND HF SPACE
 def overlay_text_on_image(translated_text):
     """
     Sends translated text to the 'textoverimage1' Space and gets the resulting image.
     """
+    # Don't proceed if translation failed or is empty
     if not translated_text or "Error" in translated_text or "No valid text" in translated_text:
         print("DEBUG: Skipping image overlay due to invalid translated text.")
+        return None # Return None to clear the image output
     try:
         print("DEBUG: Initializing client for 'kavehtaheri/textoverimage1'")
         print(f"DEBUG: Persian Text: {translated_text}")
         print(f"DEBUG: Image URL: {BACKGROUND_IMAGE_URL}")
+        # The handle_file function downloads the URL to a temporary file for upload
         result_image_path = client.predict(
             persian_text=translated_text,
+            url="", # Pass an empty string for URL if upload is used
             upload=handle_file(BACKGROUND_IMAGE_URL),
+            username="", # Not needed based on the API, can be empty
+            text_color="Black", # As specified in the screenshot
             api_name="/overlay_text_on_image"
         )
     except Exception as e:
         print(f"ERROR: Could not get image from 'textoverimage1' space. Error: {e}")
+        # Return a placeholder or raise an error in Gradio UI
+        # For now, we return None which will clear the output
         return None
+# NEW FUNCTION TO FETCH IMAGE FROM INSTAGRAM VIA ONE-API
+def get_instagram_image(ig_url):
+    """Fetch image from Instagram post using One-API."""
+    if not ig_url:
+        return None
+    try:
+        # Extract shortcode from URL
+        parsed = urlparse(ig_url)
+        path = parsed.path.strip('/').split('/')
+        if 'p' in path:
+            idx = path.index('p')
+            if idx + 1 < len(path):
+                shortcode = path[idx + 1]
+            else:
+                raise ValueError("Invalid Instagram URL: No shortcode found.")
+        else:
+            raise ValueError("Invalid Instagram URL: No 'p' segment found.")
+        # One-API details
+        api_token = "268976:66f4f58a2a905"
+        api_url = f"https://api.one-api.ir/instagram/v1/post/?shortcode={shortcode}"
+        headers = {
+            "accept": "application/json",
+            "one-api-token": api_token
+        }
+        response = requests.get(api_url, headers=headers)
+        response.raise_for_status()  # Raise if not 200
+        data = response.json()
+        # Adjust this based on actual API response structure (check docs: https://docs.one-api.ir/instagram)
+        # Assuming the image URL is in data['url'] or data['thumbnail_src'] (common for IG posts)
+        image_url = data.get('url') or data.get('thumbnail_src') or data.get('display_url')
+        if not image_url:
+            raise ValueError("No image URL found in API response.")
+        # Download the image
+        img_response = requests.get(image_url)
+        img_response.raise_for_status()
+        img = Image.open(io.BytesIO(img_response.content))
+        return img
+    except Exception as e:
+        print(f"ERROR fetching Instagram image: {str(e)}")
+        return None
+def clear_all():
+    """Clear all inputs and outputs"""
+    return None, None, "Your extracted quote will appear here...", "Words: 0", "Translation will appear here...", None, True  # Clear image, ig_url, and reset checkbox
 # --- Gradio Interface ---
 with gr.Blocks(title="Quote OCR & Overlay", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Quote Text Extractor & Image Generator")
+    gr.Markdown("Upload an image to extract text, translate it, and overlay it onto a new background.")
     with gr.Row():
         # --- INPUT COLUMN ---
         with gr.Column(scale=1):
+            image_input = gr.Image(label="1. Upload Quote Image", type="pil", sources=["upload", "clipboard"])
+            ig_input = gr.Textbox(label="Or Instagram Link", placeholder="e.g., https://www.instagram.com/p/C-ODQjyy4N3/")
+            with gr.Row():
+                clear_btn = gr.Button("Clear All", variant="secondary")
+                extract_btn = gr.Button("Extract & Translate", variant="primary")
             target_lang = gr.Dropdown(
                 label="Target Language",
+                choices=["persian(farsi)"], # Locked to Persian as per the logic
                 value="persian(farsi)",
+                interactive=False # Not changeable since the prompt is hardcoded for Persian
             )
+            auto_translate = gr.Checkbox(label="Auto-Process After Upload", value=True)
         # --- OUTPUTS COLUMN ---
         with gr.Column(scale=2):
             text_output = gr.Textbox(label="2. Extracted English Text", placeholder="Extracted text appears here...", lines=4, show_copy_button=True)
             word_count = gr.Textbox(label="Word Count", interactive=False, max_lines=1)
             translated_output = gr.Textbox(label="3. Translated Persian Text", placeholder="Persian translation appears here...", lines=4, show_copy_button=True)
+            gr.Markdown("---") # Separator
             final_image_output = gr.Image(label="4. Final Image with Text Overlay", type="filepath")
     # --- Event Handlers ---
+    # Combined function for the main button and auto-processing
+    def process_everything(image, ig_url, lang, auto_process_enabled):
+        if not auto_process_enabled:
+             # If auto-process is off, we still run extract but not the rest
+            text, wc = extract_text_from_quote(image)
+            return text, wc, "Translation will appear here...", None
+        # Determine source image: prioritize uploaded image if provided, else IG link
+        if image is not None:
+            source_image = image
+        elif ig_url:
+            source_image = get_instagram_image(ig_url)
+            if source_image is None:
+                return "Error fetching image from Instagram link.", "Words: 0", "Translation failed: No image fetched.", None
+        else:
+            return "Please upload an image or provide an Instagram link.", "Words: 0", "Translation failed: No input provided.", None
+        text, wc = extract_text_from_quote(source_image)
+        # Proceed only if text was found
+        if "No text" in text or "Error" in text:
+            return text, wc, "Translation failed: No text extracted.", None
+        translated = translate_extracted(text, lang)
+        final_image = overlay_text_on_image(translated)
+        return text, wc, translated, final_image
+    # The main button triggers the full pipeline
     extract_btn.click(
         fn=process_everything,
+        inputs=[image_input, ig_input, target_lang, gr.State(True)], # Pass True to force processing
         outputs=[text_output, word_count, translated_output, final_image_output]
     )
+    # Changing the image triggers the pipeline only if 'auto-translate' is checked
     image_input.change(
+        fn=process_everything,
+        inputs=[image_input, ig_input, target_lang, auto_translate],
         outputs=[text_output, word_count, translated_output, final_image_output]
     )
+    # Changing the IG link triggers the pipeline only if 'auto-translate' is checked
+    ig_input.change(
+        fn=process_everything,
+        inputs=[image_input, ig_input, target_lang, auto_translate],
+        outputs=[text_output, word_count, translated_output, final_image_output]
     )
+    # Clear button action
     clear_btn.click(
         fn=clear_all,
+        outputs=[image_input, ig_input, text_output, word_count, translated_output, final_image_output, auto_translate]
     )
+    gr.Markdown("### 💡 How It Works:\n1. Upload a clear image containing English text OR provide an Instagram post link.\n2. The app automatically extracts the text using OCR.\n3. The text is translated to a casual, modern Persian.\n4. The Persian text is sent to a second app which overlays it on a background image.\n5. The final image is displayed.")
 if __name__ == "__main__":
+    # Add requirements to your requirements.txt:
     # gradio
     # easyocr
     # pillow
     # google-generativeai
     # gradio_client
     # requests
+    demo.launch()