qqwjq1981's picture
Update app.py
b588863 verified
import gradio as gr
import numpy as np
import os
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
from utils.azure_translate import translate_text_azure
from utils.image_utils import split_image, encode_image_to_html
from utils.ocr_utils import extract_and_translate_chunk
from utils.polygon_utils import draw_translated_text_convex, render_translated_chunk
def load_and_split_image(file_obj, num_chunks):
from PIL import Image
import os
if file_obj is not None:
image_path = file_obj.name if hasattr(file_obj, 'name') else file_obj
filename = os.path.basename(image_path)
else:
image_path = "00_sample.jpg"
filename = "00_sample.jpg"
image = Image.open(image_path).convert("RGB")
chunks = split_image(image, num_chunks)
return filename, image, chunks
def pipeline(file_obj, num_chunks):
filename, image, chunks = load_and_split_image(file_obj, num_chunks)
all_translations = []
all_tables = []
translated_images = []
for chunk in chunks:
translations = extract_and_translate_chunk(chunk)
table_data = [[t["original"], t["translated"]] for t in translations]
all_translations.append(translations)
all_tables.extend(table_data)
translated_img = render_translated_chunk(chunk, translations)
translated_images.append(translated_img)
original_html = "".join([encode_image_to_html(c) for c in chunks])
translated_html = "".join([encode_image_to_html(t) for t in translated_images])
return filename, original_html, translated_html, all_tables, all_translations
def update_and_render(file_obj, translations, updated_table, num_chunks, font_scale=1.0):
_, image, chunks = load_and_split_image(file_obj, num_chunks)
new_images = []
index = 0
for i, chunk in enumerate(chunks):
updated_translations = []
for j in range(len(translations[i])):
updated_translations.append({
"translated": updated_table.iloc[index, 1],
"polygon": translations[i][j]["polygon"]
})
index += 1
new_image = render_translated_chunk(chunk, updated_translations, font_path="NotoSansSC-Regular.ttf", font_scale=font_scale)
new_images.append(new_image)
translated_html = "".join([encode_image_to_html(t) for t in new_images])
return translated_html
with gr.Blocks() as demo:
gr.Markdown("# πŸ„Ή Manga Translator with OCR & Azure")
with gr.Row():
file_input = gr.File(label="Upload Manga Image", type="filepath", file_types=[".jpg", ".png"])
chunk_slider = gr.Slider(1, 10, value=4, step=1, label="Split into X Chunks")
font_scale_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="Font Size Scale")
filename_display = gr.Textbox(label="πŸ“‚ Uploaded File", interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“… Original Preview (Scrollable, Fixed Height)")
original_html = gr.HTML()
with gr.Column():
gr.Markdown("### πŸ„― Translated Preview (Scrollable, Fixed Height)")
translated_html = gr.HTML()
translation_table = gr.Dataframe(headers=["Original", "Translated"], interactive=True)
translations_state = gr.State()
with gr.Row():
translate_btn = gr.Button("Extract & Translate")
render_btn = gr.Button("Apply Manual Edits")
translate_btn.click(
fn=pipeline,
inputs=[file_input, chunk_slider],
outputs=[filename_display, original_html, translated_html, translation_table, translations_state]
)
render_btn.click(
fn=update_and_render,
inputs=[file_input, translations_state, translation_table, chunk_slider, font_scale_slider],
outputs=translated_html
)
demo.launch()