Spaces:

Oliverdsfdsf
/

ComicPanelsAndTextDetect

Sleeping

App Files Files Community

ComicPanelsAndTextDetect / app.py

Oliverdsfdsf

Update app.py

a6a15f8 verified 3 days ago

raw

history blame contribute delete

3.99 kB

	import gradio as gr
	from ultralytics import YOLO
	import numpy as np
	from PIL import Image
	import os

	# 1. Load your trained YOLO26n-seg model
	# The system looks for 'comic-panels-and-text-detect.pt' in the same directory
	try:
	model = YOLO("comic-panels-and-text-detect.pt")
	except Exception as e:
	model = None
	print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}")

	# 2. Define AI Inference Logic
	def predict_comic(input_image):
	if model is None:
	return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root."

	if input_image is None:
	return None, "Please upload an image first."

	# 💡 CRITICAL FIX: Save to lossless raw PNG temp file.
	# This prevents Gradio from altering pixel arrays and enforces native Ultralytics decoding.
	tmp_path = "tmp_input_raw.png"
	input_image.save(tmp_path, format="PNG", quality=100)

	# Execute segmentation pipeline mirroring your exact local CLI parameters
	results = model.predict(
	source=tmp_path,
	conf=0.25,
	iou=0.70,
	imgsz=1280
	)

	# Safe cleanup of the temporary file
	if os.path.exists(tmp_path):
	os.remove(tmp_path)

	# Extract prediction elements from the primary image result
	res = results[0]

	# Render bounding boxes, segmentation masks, and category labels onto the original image
	annotated_img_array = res.plot(boxes=True, masks=True, labels=True)

	# Convert the resulting numpy matrix back to a displayable PIL image
	output_image = Image.fromarray(annotated_img_array)

	# Quantify the detected object instances
	counts = {"panel": 0, "text": 0}
	if res.boxes is not None:
	for c in res.boxes.cls:
	class_name = model.names[int(c)]
	if class_name in counts:
	counts[class_name] += 1

	status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles."

	return output_image, status_report

	# 3. Build UI Layout using Gradio Blocks & Custom Theme
	theme = gr.themes.Soft(
	primary_hue="lime",
	neutral_hue="slate",
	).set(
	body_background_fill="*neutral_950",
	block_background_fill="*neutral_900",
	block_label_text_color="*primary_400"
	)

	with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo:
	gr.Markdown(
	"""
	# 🚀 ComicPanelsAndTextDetect
	This interactive application showcases the core computer vision segmentation pipeline powering the ebookcc ecosystem.
	Utilizing a specialized, fine-tuned YOLO26n-seg engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books.
	"""
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📥 Source Image Upload")
	input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)")
	btn = gr.Button("🔥 Run AI Layout Engine", variant="primary")

	with gr.Column():
	gr.Markdown("### 📤 Segmentation Output")
	output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay")
	status_output = gr.Textbox(label="Execution Summary Logs", interactive=False)

	# Bind click trigger to engine handler
	btn.click(
	fn=predict_comic,
	inputs=input_img,
	outputs=[output_img, status_output]
	)

	gr.Markdown(
	"""
	---
	### 💡 Integration Details
	This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces.
	To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/).
	"""
	)

	# 4. Initialize Framework Mainloop
	if __name__ == "__main__":
	demo.launch()