import gradio as gr import numpy as np import os from io import BytesIO from PIL import Image, ImageDraw # --- Existing utils --- from utils.azure_translate import translate_text_azure from utils.image_utils import encode_image_to_html, load_and_split_image from utils.ocr_utils import extract_and_translate_chunk from utils.polygon_utils import draw_translated_text_convex, render_translated_chunk, shrink_or_expand_polygon # --- Nano Banana --- from utils.nano_banana import nano_banana_translate_inpaint from utils.bubble_utils import bubble_pipeline_single from utils.bubble_detect import detect_speech_bubbles def compute_auto_chunks(width, height, ratio=2.5, max_chunks=10): """ Computes a safe, rounded number of chunks based on the aspect ratio. Ensures: - Always >= 1 - Rounded to nearest integer - Respects max_chunks Formula: chunks ≈ height / (width * ratio) """ if width <= 0 or height <= 0: return 1 # fallback raw_value = height / (width * ratio) # Proper rounding → 0.4→0, 0.5→1, 1.6→2 … chunks = int(round(raw_value)) # Safety clamps if chunks < 1: chunks = 1 if chunks > max_chunks: chunks = max_chunks return chunks # ----------------------------- # Pipeline Method (OCR → Translate → Inpaint) # ----------------------------- def pipeline(file_obj, num_chunks): filename, image, chunks = load_and_split_image(file_obj, num_chunks) all_translations = [] all_tables = [] translated_images = [] for chunk in chunks: translations = extract_and_translate_chunk(chunk) table_data = [[t["original"], t["translated"]] for t in translations] all_translations.append(translations) all_tables.extend(table_data) translated_img = render_translated_chunk(chunk, translations) translated_images.append(translated_img) original_html = "".join([encode_image_to_html(c) for c in chunks]) translated_html = "".join([encode_image_to_html(t) for t in translated_images]) return filename, original_html, translated_html, all_tables, all_translations, [] # ----------------------------- # Nano Banana LLM Method # ----------------------------- def nano_pipeline(file_obj, num_chunks): filename, image, _ = load_and_split_image(file_obj, num_chunks) translated_img = nano_banana_translate_inpaint(image) original_html = encode_image_to_html(image) translated_html = encode_image_to_html(translated_img) return filename, original_html, translated_html, [], [], [] # ----------------------------- # Manual Edit Rendering # ----------------------------- def update_and_render(file_obj, translations, updated_table, num_chunks, font_scale=1.0): _, image, chunks = load_and_split_image(file_obj, num_chunks) new_images = [] index = 0 for i, chunk in enumerate(chunks): updated_translations = [] for j in range(len(translations[i])): updated_translations.append({ "translated": updated_table.iloc[index, 1], "polygon": translations[i][j]["polygon"] }) index += 1 new_image = render_translated_chunk( chunk, updated_translations, font_path="NotoSansSC-Regular.ttf", font_scale=font_scale, ) new_images.append(new_image) translated_html = "".join([encode_image_to_html(t) for t in new_images]) return translated_html # ----------------------------- # Dispatcher Function # ----------------------------- def dispatch_pipeline(file_obj, num_chunks, mode): # Auto-override num_chunks based on aspect ratio try: img = Image.open(file_obj).convert("RGB") w, h = img.size auto_chunks = compute_auto_chunks(w, h) print(f"⚙️ Auto chunks override: {auto_chunks}") num_chunks = auto_chunks except Exception as e: print(f"⚠️ Failed computing auto chunks: {e}") if mode == "Pipeline": return pipeline(file_obj, num_chunks) elif mode == "NanoBanana": return nano_pipeline(file_obj, num_chunks) elif mode == "Bubble": return bubble_pipeline_single(file_obj, num_chunks) else: raise ValueError(f"Unknown mode: {mode}") # =========================================================== # GRADIO UI # =========================================================== with gr.Blocks() as demo: gr.Markdown("# 🄹 Manga Translator with OCR, Azure, NanoBanana & Bubble Mode") with gr.Row(): file_input = gr.File( label="Upload Manga Image", type="filepath", file_types=[".jpg", ".png"] ) chunk_slider = gr.Slider(1, 10, value=4, step=1, label="Split into X Chunks") font_scale_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="Font Size Scale") # NEW: Translation Mode Selector mode_selector = gr.Dropdown( ["Pipeline", "NanoBanana", "Bubble"], value="Bubble", label="Translation Mode" ) filename_display = gr.Textbox(label="📂 Uploaded File", interactive=False) with gr.Row(): with gr.Column(): gr.Markdown("### 📅 Original Preview (Scrollable)") original_html = gr.HTML() with gr.Column(): gr.Markdown("### 🄯 Translated Preview (Scrollable)") translated_html = gr.HTML() translation_table = gr.Dataframe(headers=["Original", "Translated"], interactive=True) translations_state = gr.State() with gr.Row(): translate_btn = gr.Button("Extract & Translate") render_btn = gr.Button("Apply Manual Edits") debug_files_output = gr.Files(label="Debug Images") # Dispatcher translate_btn.click( fn=dispatch_pipeline, inputs=[file_input, chunk_slider, mode_selector], outputs=[ filename_display, original_html, translated_html, translation_table, translations_state, debug_files_output ], ) # Manual updates (not for Bubble or Nano) render_btn.click( fn=update_and_render, inputs=[file_input, translations_state, translation_table, chunk_slider, font_scale_slider], outputs=translated_html, ) demo.launch()