File size: 3,988 Bytes
549fff1 a6a15f8 549fff1 4869c56 549fff1 50d8d31 549fff1 50d8d31 549fff1 a6a15f8 549fff1 a6a15f8 549fff1 a6a15f8 549fff1 a6a15f8 549fff1 4869c56 549fff1 4869c56 549fff1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import os
# 1. Load your trained YOLO26n-seg model
# The system looks for 'comic-panels-and-text-detect.pt' in the same directory
try:
model = YOLO("comic-panels-and-text-detect.pt")
except Exception as e:
model = None
print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}")
# 2. Define AI Inference Logic
def predict_comic(input_image):
if model is None:
return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root."
if input_image is None:
return None, "Please upload an image first."
# π‘ CRITICAL FIX: Save to lossless raw PNG temp file.
# This prevents Gradio from altering pixel arrays and enforces native Ultralytics decoding.
tmp_path = "tmp_input_raw.png"
input_image.save(tmp_path, format="PNG", quality=100)
# Execute segmentation pipeline mirroring your exact local CLI parameters
results = model.predict(
source=tmp_path,
conf=0.25,
iou=0.70,
imgsz=1280
)
# Safe cleanup of the temporary file
if os.path.exists(tmp_path):
os.remove(tmp_path)
# Extract prediction elements from the primary image result
res = results[0]
# Render bounding boxes, segmentation masks, and category labels onto the original image
annotated_img_array = res.plot(boxes=True, masks=True, labels=True)
# Convert the resulting numpy matrix back to a displayable PIL image
output_image = Image.fromarray(annotated_img_array)
# Quantify the detected object instances
counts = {"panel": 0, "text": 0}
if res.boxes is not None:
for c in res.boxes.cls:
class_name = model.names[int(c)]
if class_name in counts:
counts[class_name] += 1
status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles."
return output_image, status_report
# 3. Build UI Layout using Gradio Blocks & Custom Theme
theme = gr.themes.Soft(
primary_hue="lime",
neutral_hue="slate",
).set(
body_background_fill="*neutral_950",
block_background_fill="*neutral_900",
block_label_text_color="*primary_400"
)
with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo:
gr.Markdown(
"""
# π ComicPanelsAndTextDetect
This interactive application showcases the core computer vision segmentation pipeline powering the **ebookcc** ecosystem.
Utilizing a specialized, fine-tuned **YOLO26n-seg** engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books.
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### π₯ Source Image Upload")
input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)")
btn = gr.Button("π₯ Run AI Layout Engine", variant="primary")
with gr.Column():
gr.Markdown("### π€ Segmentation Output")
output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay")
status_output = gr.Textbox(label="Execution Summary Logs", interactive=False)
# Bind click trigger to engine handler
btn.click(
fn=predict_comic,
inputs=input_img,
outputs=[output_img, status_output]
)
gr.Markdown(
"""
---
### π‘ Integration Details
This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces.
To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/).
"""
)
# 4. Initialize Framework Mainloop
if __name__ == "__main__":
demo.launch() |