File size: 3,988 Bytes
549fff1
 
 
 
a6a15f8
549fff1
 
4869c56
549fff1
 
 
 
50d8d31
549fff1
 
 
 
50d8d31
549fff1
 
 
 
a6a15f8
 
 
 
549fff1
a6a15f8
549fff1
a6a15f8
549fff1
 
 
 
 
a6a15f8
 
 
 
549fff1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4869c56
549fff1
 
 
 
 
 
 
 
 
 
 
4869c56
549fff1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import os

# 1. Load your trained YOLO26n-seg model
# The system looks for 'comic-panels-and-text-detect.pt' in the same directory
try:
    model = YOLO("comic-panels-and-text-detect.pt")
except Exception as e:
    model = None
    print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}")

# 2. Define AI Inference Logic
def predict_comic(input_image):
    if model is None:
        return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root."
    
    if input_image is None:
        return None, "Please upload an image first."

    # πŸ’‘ CRITICAL FIX: Save to lossless raw PNG temp file.
    # This prevents Gradio from altering pixel arrays and enforces native Ultralytics decoding.
    tmp_path = "tmp_input_raw.png"
    input_image.save(tmp_path, format="PNG", quality=100)

    # Execute segmentation pipeline mirroring your exact local CLI parameters
    results = model.predict(
        source=tmp_path,
        conf=0.25,
        iou=0.70,
        imgsz=1280
    )

    # Safe cleanup of the temporary file
    if os.path.exists(tmp_path):
        os.remove(tmp_path)

    # Extract prediction elements from the primary image result
    res = results[0]

    # Render bounding boxes, segmentation masks, and category labels onto the original image
    annotated_img_array = res.plot(boxes=True, masks=True, labels=True)

    # Convert the resulting numpy matrix back to a displayable PIL image
    output_image = Image.fromarray(annotated_img_array)

    # Quantify the detected object instances
    counts = {"panel": 0, "text": 0}
    if res.boxes is not None:
        for c in res.boxes.cls:
            class_name = model.names[int(c)]
            if class_name in counts:
                counts[class_name] += 1

    status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles."
    
    return output_image, status_report

# 3. Build UI Layout using Gradio Blocks & Custom Theme
theme = gr.themes.Soft(
    primary_hue="lime",
    neutral_hue="slate",
).set(
    body_background_fill="*neutral_950",
    block_background_fill="*neutral_900",
    block_label_text_color="*primary_400"
)

with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo:
    gr.Markdown(
        """
        # πŸš€ ComicPanelsAndTextDetect
        This interactive application showcases the core computer vision segmentation pipeline powering the **ebookcc** ecosystem. 
        Utilizing a specialized, fine-tuned **YOLO26n-seg** engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books.
        """
    )
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### πŸ“₯ Source Image Upload")
            input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)")
            btn = gr.Button("πŸ”₯ Run AI Layout Engine", variant="primary")
            
        with gr.Column():
            gr.Markdown("### πŸ“€ Segmentation Output")
            output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay")
            status_output = gr.Textbox(label="Execution Summary Logs", interactive=False)

    # Bind click trigger to engine handler
    btn.click(
        fn=predict_comic,
        inputs=input_img,
        outputs=[output_img, status_output]
    )
    
    gr.Markdown(
        """
        ---
        ### πŸ’‘ Integration Details
        This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces. 
        To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/).
        """
    )

# 4. Initialize Framework Mainloop
if __name__ == "__main__":
    demo.launch()