import gradio as gr from ultralytics import YOLO import numpy as np from PIL import Image import os # 1. Load your trained YOLO26n-seg model # The system looks for 'comic-panels-and-text-detect.pt' in the same directory try: model = YOLO("comic-panels-and-text-detect.pt") except Exception as e: model = None print(f"Failed to load model. Please ensure 'comic-panels-and-text-detect.pt' is uploaded. Error: {e}") # 2. Define AI Inference Logic def predict_comic(input_image): if model is None: return None, "Error: 'comic-panels-and-text-detect.pt' weights file not found. Please upload it to your Space root." if input_image is None: return None, "Please upload an image first." # 💡 CRITICAL FIX: Save to lossless raw PNG temp file. # This prevents Gradio from altering pixel arrays and enforces native Ultralytics decoding. tmp_path = "tmp_input_raw.png" input_image.save(tmp_path, format="PNG", quality=100) # Execute segmentation pipeline mirroring your exact local CLI parameters results = model.predict( source=tmp_path, conf=0.25, iou=0.70, imgsz=1280 ) # Safe cleanup of the temporary file if os.path.exists(tmp_path): os.remove(tmp_path) # Extract prediction elements from the primary image result res = results[0] # Render bounding boxes, segmentation masks, and category labels onto the original image annotated_img_array = res.plot(boxes=True, masks=True, labels=True) # Convert the resulting numpy matrix back to a displayable PIL image output_image = Image.fromarray(annotated_img_array) # Quantify the detected object instances counts = {"panel": 0, "text": 0} if res.boxes is not None: for c in res.boxes.cls: class_name = model.names[int(c)] if class_name in counts: counts[class_name] += 1 status_report = f"Analysis Successful! Found {counts['panel']} Comic Panels and {counts['text']} Text Bubbles." return output_image, status_report # 3. Build UI Layout using Gradio Blocks & Custom Theme theme = gr.themes.Soft( primary_hue="lime", neutral_hue="slate", ).set( body_background_fill="*neutral_950", block_background_fill="*neutral_900", block_label_text_color="*primary_400" ) with gr.Blocks(theme=theme, title="ComicPanelsAndTextDetect") as demo: gr.Markdown( """ # 🚀 ComicPanelsAndTextDetect This interactive application showcases the core computer vision segmentation pipeline powering the **ebookcc** ecosystem. Utilizing a specialized, fine-tuned **YOLO26n-seg** engine, it delivers high-fidelity layout analysis for Manga, Manhwa, Comics, and scanned books. """ ) with gr.Row(): with gr.Column(): gr.Markdown("### 📥 Source Image Upload") input_img = gr.Image(type="pil", label="Input Graphic (Resolutions >= 1280px highly recommended)") btn = gr.Button("🔥 Run AI Layout Engine", variant="primary") with gr.Column(): gr.Markdown("### 📤 Segmentation Output") output_img = gr.Image(type="pil", label="YOLO26n-seg Visual Overlay") status_output = gr.Textbox(label="Execution Summary Logs", interactive=False) # Bind click trigger to engine handler btn.click( fn=predict_comic, inputs=input_img, outputs=[output_img, status_output] ) gr.Markdown( """ --- ### 💡 Integration Details This operational instance operates inside an isolated cloud sandbox sandbox on Hugging Face Spaces. To leverage this layout model within end-to-end processing environments, explore our live production environment: [ebookcc Web App Platform](https://ebookcc.cptd.workers.dev/). """ ) # 4. Initialize Framework Mainloop if __name__ == "__main__": demo.launch()