qqwjq1981's picture
Update app.py
f4b6dc2 verified
import gradio as gr
import numpy as np
import os
from io import BytesIO
from PIL import Image, ImageDraw
# --- Existing utils ---
from utils.azure_translate import translate_text_azure
from utils.image_utils import encode_image_to_html, load_and_split_image
from utils.ocr_utils import extract_and_translate_chunk
from utils.polygon_utils import draw_translated_text_convex, render_translated_chunk, shrink_or_expand_polygon
# --- Nano Banana ---
from utils.nano_banana import nano_banana_translate_inpaint
from utils.bubble_utils import bubble_pipeline_single
from utils.bubble_detect import detect_speech_bubbles
def compute_auto_chunks(width, height, ratio=2.5, max_chunks=10):
"""
Computes a safe, rounded number of chunks based on the aspect ratio.
Ensures:
- Always >= 1
- Rounded to nearest integer
- Respects max_chunks
Formula: chunks β‰ˆ height / (width * ratio)
"""
if width <= 0 or height <= 0:
return 1 # fallback
raw_value = height / (width * ratio)
# Proper rounding β†’ 0.4β†’0, 0.5β†’1, 1.6β†’2 …
chunks = int(round(raw_value))
# Safety clamps
if chunks < 1:
chunks = 1
if chunks > max_chunks:
chunks = max_chunks
return chunks
# -----------------------------
# Pipeline Method (OCR β†’ Translate β†’ Inpaint)
# -----------------------------
def pipeline(file_obj, num_chunks):
filename, image, chunks = load_and_split_image(file_obj, num_chunks)
all_translations = []
all_tables = []
translated_images = []
for chunk in chunks:
translations = extract_and_translate_chunk(chunk)
table_data = [[t["original"], t["translated"]] for t in translations]
all_translations.append(translations)
all_tables.extend(table_data)
translated_img = render_translated_chunk(chunk, translations)
translated_images.append(translated_img)
original_html = "".join([encode_image_to_html(c) for c in chunks])
translated_html = "".join([encode_image_to_html(t) for t in translated_images])
return filename, original_html, translated_html, all_tables, all_translations, []
# -----------------------------
# Nano Banana LLM Method
# -----------------------------
def nano_pipeline(file_obj, num_chunks):
filename, image, _ = load_and_split_image(file_obj, num_chunks)
translated_img = nano_banana_translate_inpaint(image)
original_html = encode_image_to_html(image)
translated_html = encode_image_to_html(translated_img)
return filename, original_html, translated_html, [], [], []
# -----------------------------
# Manual Edit Rendering
# -----------------------------
def update_and_render(file_obj, translations, updated_table, num_chunks, font_scale=1.0):
_, image, chunks = load_and_split_image(file_obj, num_chunks)
new_images = []
index = 0
for i, chunk in enumerate(chunks):
updated_translations = []
for j in range(len(translations[i])):
updated_translations.append({
"translated": updated_table.iloc[index, 1],
"polygon": translations[i][j]["polygon"]
})
index += 1
new_image = render_translated_chunk(
chunk,
updated_translations,
font_path="NotoSansSC-Regular.ttf",
font_scale=font_scale,
)
new_images.append(new_image)
translated_html = "".join([encode_image_to_html(t) for t in new_images])
return translated_html
# -----------------------------
# Dispatcher Function
# -----------------------------
def dispatch_pipeline(file_obj, num_chunks, mode):
# Auto-override num_chunks based on aspect ratio
try:
img = Image.open(file_obj).convert("RGB")
w, h = img.size
auto_chunks = compute_auto_chunks(w, h)
print(f"βš™οΈ Auto chunks override: {auto_chunks}")
num_chunks = auto_chunks
except Exception as e:
print(f"⚠️ Failed computing auto chunks: {e}")
if mode == "Pipeline":
return pipeline(file_obj, num_chunks)
elif mode == "NanoBanana":
return nano_pipeline(file_obj, num_chunks)
elif mode == "Bubble":
return bubble_pipeline_single(file_obj, num_chunks)
else:
raise ValueError(f"Unknown mode: {mode}")
# ===========================================================
# GRADIO UI
# ===========================================================
with gr.Blocks() as demo:
gr.Markdown("# πŸ„Ή Manga Translator with OCR, Azure, NanoBanana & Bubble Mode")
with gr.Row():
file_input = gr.File(
label="Upload Manga Image",
type="filepath",
file_types=[".jpg", ".png"]
)
chunk_slider = gr.Slider(1, 10, value=4, step=1, label="Split into X Chunks")
font_scale_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="Font Size Scale")
# NEW: Translation Mode Selector
mode_selector = gr.Dropdown(
["Pipeline", "NanoBanana", "Bubble"],
value="Bubble",
label="Translation Mode"
)
filename_display = gr.Textbox(label="πŸ“‚ Uploaded File", interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“… Original Preview (Scrollable)")
original_html = gr.HTML()
with gr.Column():
gr.Markdown("### πŸ„― Translated Preview (Scrollable)")
translated_html = gr.HTML()
translation_table = gr.Dataframe(headers=["Original", "Translated"], interactive=True)
translations_state = gr.State()
with gr.Row():
translate_btn = gr.Button("Extract & Translate")
render_btn = gr.Button("Apply Manual Edits")
debug_files_output = gr.Files(label="Debug Images")
# Dispatcher
translate_btn.click(
fn=dispatch_pipeline,
inputs=[file_input, chunk_slider, mode_selector],
outputs=[
filename_display,
original_html,
translated_html,
translation_table,
translations_state,
debug_files_output
],
)
# Manual updates (not for Bubble or Nano)
render_btn.click(
fn=update_and_render,
inputs=[file_input, translations_state, translation_table, chunk_slider, font_scale_slider],
outputs=translated_html,
)
demo.launch()