Oliverdsfdsf's picture
Update app.py
a6a15f8 verified
import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import os
# 1. Load your trained YOLO26n-seg model
# The system looks for 'comic-panels-and-text-detect.pt' in the same directory
try:
model = YOLO("comic-panels-and-text-detect.pt")
except Exception as e:
model = None
print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}")
# 2. Define AI Inference Logic
def predict_comic(input_image):
if model is None:
return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root."
if input_image is None:
return None, "Please upload an image first."
# πŸ’‘ CRITICAL FIX: Save to lossless raw PNG temp file.
# This prevents Gradio from altering pixel arrays and enforces native Ultralytics decoding.
tmp_path = "tmp_input_raw.png"
input_image.save(tmp_path, format="PNG", quality=100)
# Execute segmentation pipeline mirroring your exact local CLI parameters
results = model.predict(
source=tmp_path,
conf=0.25,
iou=0.70,
imgsz=1280
)
# Safe cleanup of the temporary file
if os.path.exists(tmp_path):
os.remove(tmp_path)
# Extract prediction elements from the primary image result
res = results[0]
# Render bounding boxes, segmentation masks, and category labels onto the original image
annotated_img_array = res.plot(boxes=True, masks=True, labels=True)
# Convert the resulting numpy matrix back to a displayable PIL image
output_image = Image.fromarray(annotated_img_array)
# Quantify the detected object instances
counts = {"panel": 0, "text": 0}
if res.boxes is not None:
for c in res.boxes.cls:
class_name = model.names[int(c)]
if class_name in counts:
counts[class_name] += 1
status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles."
return output_image, status_report
# 3. Build UI Layout using Gradio Blocks & Custom Theme
theme = gr.themes.Soft(
primary_hue="lime",
neutral_hue="slate",
).set(
body_background_fill="*neutral_950",
block_background_fill="*neutral_900",
block_label_text_color="*primary_400"
)
with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo:
gr.Markdown(
"""
# πŸš€ ComicPanelsAndTextDetect
This interactive application showcases the core computer vision segmentation pipeline powering the **ebookcc** ecosystem.
Utilizing a specialized, fine-tuned **YOLO26n-seg** engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books.
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“₯ Source Image Upload")
input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)")
btn = gr.Button("πŸ”₯ Run AI Layout Engine", variant="primary")
with gr.Column():
gr.Markdown("### πŸ“€ Segmentation Output")
output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay")
status_output = gr.Textbox(label="Execution Summary Logs", interactive=False)
# Bind click trigger to engine handler
btn.click(
fn=predict_comic,
inputs=input_img,
outputs=[output_img, status_output]
)
gr.Markdown(
"""
---
### πŸ’‘ Integration Details
This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces.
To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/).
"""
)
# 4. Initialize Framework Mainloop
if __name__ == "__main__":
demo.launch()