| import gradio as gr |
| from ultralytics import YOLO |
| import numpy as np |
| from PIL import Image |
| import os |
|
|
| |
| |
| try: |
| model = YOLO("comic-panels-and-text-detect.pt") |
| except Exception as e: |
| model = None |
| print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}") |
|
|
| |
| def predict_comic(input_image): |
| if model is None: |
| return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root." |
| |
| if input_image is None: |
| return None, "Please upload an image first." |
|
|
| |
| |
| tmp_path = "tmp_input_raw.png" |
| input_image.save(tmp_path, format="PNG", quality=100) |
|
|
| |
| results = model.predict( |
| source=tmp_path, |
| conf=0.25, |
| iou=0.70, |
| imgsz=1280 |
| ) |
|
|
| |
| if os.path.exists(tmp_path): |
| os.remove(tmp_path) |
|
|
| |
| res = results[0] |
|
|
| |
| annotated_img_array = res.plot(boxes=True, masks=True, labels=True) |
|
|
| |
| output_image = Image.fromarray(annotated_img_array) |
|
|
| |
| counts = {"panel": 0, "text": 0} |
| if res.boxes is not None: |
| for c in res.boxes.cls: |
| class_name = model.names[int(c)] |
| if class_name in counts: |
| counts[class_name] += 1 |
|
|
| status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles." |
| |
| return output_image, status_report |
|
|
| |
| theme = gr.themes.Soft( |
| primary_hue="lime", |
| neutral_hue="slate", |
| ).set( |
| body_background_fill="*neutral_950", |
| block_background_fill="*neutral_900", |
| block_label_text_color="*primary_400" |
| ) |
|
|
| with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo: |
| gr.Markdown( |
| """ |
| # π ComicPanelsAndTextDetect |
| This interactive application showcases the core computer vision segmentation pipeline powering the **ebookcc** ecosystem. |
| Utilizing a specialized, fine-tuned **YOLO26n-seg** engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books. |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### π₯ Source Image Upload") |
| input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)") |
| btn = gr.Button("π₯ Run AI Layout Engine", variant="primary") |
| |
| with gr.Column(): |
| gr.Markdown("### π€ Segmentation Output") |
| output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay") |
| status_output = gr.Textbox(label="Execution Summary Logs", interactive=False) |
|
|
| |
| btn.click( |
| fn=predict_comic, |
| inputs=input_img, |
| outputs=[output_img, status_output] |
| ) |
| |
| gr.Markdown( |
| """ |
| --- |
| ### π‘ Integration Details |
| This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces. |
| To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/). |
| """ |
| ) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch() |