djscanlation

Sleeping

App Files Files Community

djscanlation / app.py

maggidev

Update app.py

add03f2 verified 15 days ago

raw

history blame contribute delete

3.19 kB

	from detect_bubbles import detect_bubbles
	from process_bubble import extract_bubble_mask
	from add_text import add_text
	from translator import MangaTranslator
	from manga_ocr import MangaOcr

	from PIL import Image
	import numpy as np
	import cv2
	import gradio as gr

	MODEL = "model.pt"

	# Modelos carregados uma única vez
	manga_translator = MangaTranslator()
	mocr = MangaOcr()


	def predict(img, translation_method, font):
	# =========================
	# Imagens separadas por função
	# =========================
	original_image = np.array(img).copy() # usada SOMENTE para OCR
	render_image = np.array(img).copy() # usada para limpeza + escrita

	results = detect_bubbles(MODEL, original_image)

	page_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
	bubbles_data = []

	# =========================
	# 1) OCR + extração de máscara (SEM modificar imagem)
	# =========================
	for x1, y1, x2, y2, score, class_id in results:
	x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))

	crop_original = original_image[y1:y2, x1:x2]

	# OCR SEMPRE na imagem original
	pil_crop = Image.fromarray(crop_original.astype(np.uint8))
	text = mocr(pil_crop)

	mask, contour = extract_bubble_mask(crop_original)
	if mask is None:
	continue

	page_mask[y1:y2, x1:x2][mask == 255] = 255

	bubbles_data.append({
	"box": (x1, y1, x2, y2),
	"text": text,
	"contour": contour
	})

	# =========================
	# 2) Limpeza global da página (UMA VEZ)
	# =========================
	render_image[page_mask == 255] = (255, 255, 255)

	# =========================
	# 3) Tradução + escrita
	# =========================
	for bubble in bubbles_data:
	x1, y1, x2, y2 = bubble["box"]
	text = bubble["text"]
	contour = bubble["contour"]

	translated = manga_translator.translate(
	text,
	method=translation_method
	)

	render_image[y1:y2, x1:x2] = add_text(
	render_image[y1:y2, x1:x2],
	translated,
	font,
	contour
	)

	return Image.fromarray(render_image)


	# =========================
	# Interface Gradio
	# =========================

	demo = gr.Interface(
	fn=predict,
	inputs=[
	"image",
	gr.Dropdown(
	[
	("Google", "google"),
	("HuggingFace", "hf"),
	("Bing", "bing"),
	("Sogou", "sogou")
	],
	value="google",
	label="Translation Method"
	),
	gr.Dropdown(
	[
	("Wild Words Roman", "fonts/WildWordsRoman.ttf"),
	("Anime Ace Italic", "fonts/animeace_i.ttf"),
	("Mangati", "fonts/mangati.ttf"),
	("Komika Axis", "fonts/KOMIKAX_.ttf")
	],
	value="fonts/WildWordsRoman.ttf",
	label="Font"
	)
	],
	outputs=gr.Image(),
	title="Manga Translator",
	description="Translate manga speech bubbles safely and correctly"
	)

	demo.launch(debug=False, share=False)