qqwjq1981's picture
Update app.py
0438fe3 verified
raw
history blame
3.86 kB
import gradio as gr
import numpy as np
import os
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
from utils.azure_translate import translate_text_azure
from utils.image_utils import split_image, encode_image_to_html
from utils.ocr_utils import extract_and_translate_chunk
from utils.polygon_utils import draw_translated_text_convex, render_translated_chunk
def load_and_split_image(file_obj, num_chunks):
from PIL import Image
import os
if file_obj is not None:
image_path = file_obj.name if hasattr(file_obj, 'name') else file_obj
filename = os.path.basename(image_path)
else:
image_path = "00_sample.jpg"
filename = "00_sample.jpg"
image = Image.open(image_path).convert("RGB")
chunks = split_image(image, num_chunks)
return filename, image, chunks
def pipeline(file_obj, num_chunks):
filename, image, chunks = load_and_split_image(file_obj, num_chunks)
all_translations = []
all_tables = []
translated_images = []
for chunk in chunks:
translations = extract_and_translate_chunk(chunk)
table_data = [[t["original"], t["translated"]] for t in translations]
all_translations.append(translations)
all_tables.extend(table_data)
translated_img = render_translated_chunk(chunk, translations)
translated_images.append(translated_img)
original_html = "".join([encode_image_to_html(c) for c in chunks])
translated_html = "".join([encode_image_to_html(t) for t in translated_images])
return filename, original_html, translated_html, all_tables, all_translations
def update_and_render(file_obj, translations, updated_table, num_chunks, font_scale=1.0):
_, image, chunks = load_and_split_image(file_obj, num_chunks)
new_images = []
index = 0
for i, chunk in enumerate(chunks):
updated_translations = []
for j in range(len(translations[i])):
updated_translations.append({
"translated": updated_table.iloc[index, 1],
"polygon": translations[i][j]["polygon"]
})
index += 1
new_image = render_translated_chunk(chunk, updated_translations, font_path="NotoSansSC-Regular.ttf", font_scale=font_scale)
new_images.append(new_image)
translated_html = "".join([encode_image_to_html(t) for t in new_images])
return translated_html
with gr.Blocks() as demo:
gr.Markdown("# πŸ„Ή Manga Translator with OCR & Azure")
with gr.Row():
file_input = gr.File(label="Upload Manga Image", type="filepath", file_types=[".jpg", ".png"])
chunk_slider = gr.Slider(1, 10, value=4, step=1, label="Split into X Chunks")
font_scale_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="Font Size Scale")
filename_display = gr.Textbox(label="πŸ“‚ Uploaded File", interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“… Original Preview (Scrollable, Fixed Height)")
original_html = gr.HTML()
with gr.Column():
gr.Markdown("### πŸ„― Translated Preview (Scrollable, Fixed Height)")
translated_html = gr.HTML()
translation_table = gr.Dataframe(headers=["Original", "Translated"], interactive=True)
translations_state = gr.State()
with gr.Row():
translate_btn = gr.Button("Extract & Translate")
render_btn = gr.Button("Apply Manual Edits")
translate_btn.click(
fn=pipeline,
inputs=[file_input, chunk_slider],
outputs=[filename_display, original_html, translated_html, translation_table, translations_state]
)
render_btn.click(
fn=update_and_render,
inputs=[file_input, translations_state, translation_table, chunk_slider, font_scale_slider],
outputs=translated_html
)
demo.launch()