Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

jebin2 commited on Oct 16

Commit

951222c

1 Parent(s): e6e7958

add train support

Browse files

Files changed (12) hide show

.gitignore +3 -2
comic_panel_extractor/annorator_server.py +212 -19
comic_panel_extractor/config.py +29 -11
comic_panel_extractor/config.toml +4 -0
comic_panel_extractor/create_dataset.py +1 -1
comic_panel_extractor/image_labels/image labels generated here.info +0 -0
comic_panel_extractor/images/Place the images here.info +0 -0
comic_panel_extractor/server.py +12 -1
comic_panel_extractor/static/annotator.html +229 -5
comic_panel_extractor/static/index.html +4 -0
comic_panel_extractor/ws_manager.py +29 -0
requirements.txt +3 -1

.gitignore CHANGED Viewed

@@ -209,8 +209,9 @@ temp_dir
 input.jpg
 comic_panel_extractor/api_outputs/
 comic_panel_extractor/dataset/
-comic_panel_extractor/images/
-comic_panel_extractor/image_labels/
 comic_panel_extractor/runs/
 comic_panel_extractor/temp_dir/
 temp.py

 input.jpg
 comic_panel_extractor/api_outputs/
 comic_panel_extractor/dataset/
+comic_panel_extractor/images/*.png
+comic_panel_extractor/images/*.jpg
+comic_panel_extractor/image_labels/*.txt
 comic_panel_extractor/runs/
 comic_panel_extractor/temp_dir/
 temp.py

comic_panel_extractor/annorator_server.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from fastapi import APIRouter, HTTPException, UploadFile, File
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, field_validator
 from typing import List
-from PIL import Image
 import os
 import base64
 from io import BytesIO
@@ -12,12 +13,28 @@ from typing import List, Optional, Union, Dict, Any
 from . import utils
 import copy
 import traceback
 app = APIRouter()
 # === Configuration ===
-IMAGE_ROOT = os.path.join(Config.current_path, "dataset/images")
-LABEL_ROOT = os.path.join(Config.current_path, "dataset/labels")
 IMAGE_LABEL_ROOT = os.path.join(Config.current_path, "image_labels")
 CLASS_ID = 0
@@ -64,7 +81,7 @@ def get_image_path(image_name: str) -> str:
     return os.path.join(IMAGE_ROOT, image_name)
 def get_label_path(image_name: str) -> str:
-    return os.path.join(LABEL_ROOT, os.path.splitext(image_name)[0] + ".txt")
 # === Core Functions ===
 def load_yolo_annotations(image_path: str, label_path: str, detect: bool = False):
@@ -252,19 +269,23 @@ async def list_all_images():
     for root, _, files in os.walk(IMAGE_ROOT):
         for file in sorted(files):
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
-                image_path = os.path.join(root, file)
-                rel_path = os.path.relpath(image_path, IMAGE_ROOT)
-                label_path = get_label_path(rel_path)
-                img = Image.open(image_path)
-                width, height = img.size
-                image_info_list.append(ImageInfo(
-                    name=rel_path.replace("\\", "/"),
-                    width=width,
-                    height=height,
-                    has_annotations=os.path.exists(label_path)
-                ))
     return image_info_list
 @app.get("/api/annotate/image/{image_name:path}")
@@ -354,4 +375,176 @@ async def upload_image(file: UploadFile = File(...)):
     with open(file_path, "wb") as f:
         f.write(await file.read())
     shutil.copy(file_path, f'{Config.IMAGE_SOURCE_PATH}/{file.filename}')
-    return {"message": f"Uploaded {file.filename} to train set"}

+from fastapi import APIRouter, HTTPException, UploadFile, File, WebSocket, WebSocketDisconnect
 from fastapi.responses import FileResponse
+from .ws_manager import manager
 from pydantic import BaseModel, field_validator
 from typing import List
+from PIL import Image, UnidentifiedImageError
 import os
 import base64
 from io import BytesIO
 from . import utils
 import copy
 import traceback
+import asyncio
+import sys, signal
+import psutil
+import subprocess
+from . import common
+import fcntl
 app = APIRouter()
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await manager.connect(websocket)
+    try:
+        while True:
+            data = await websocket.receive_text()
+            # Handle any websocket messages if needed
+    except WebSocketDisconnect:
+        print("Client disconnected:", websocket.client)
+        manager.disconnect(websocket)
 # === Configuration ===
+IMAGE_ROOT = os.path.join(Config.current_path, "images")
 IMAGE_LABEL_ROOT = os.path.join(Config.current_path, "image_labels")
 CLASS_ID = 0
     return os.path.join(IMAGE_ROOT, image_name)
 def get_label_path(image_name: str) -> str:
+    return os.path.join(IMAGE_LABEL_ROOT, os.path.splitext(image_name)[0] + ".txt")
 # === Core Functions ===
 def load_yolo_annotations(image_path: str, label_path: str, detect: bool = False):
     for root, _, files in os.walk(IMAGE_ROOT):
         for file in sorted(files):
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
+                try:
+                    image_path = os.path.join(root, file)
+                    rel_path = os.path.relpath(image_path, IMAGE_ROOT)
+                    label_path = get_label_path(rel_path)
+                    img = Image.open(image_path)
+                    width, height = img.size
+                    image_info_list.append(ImageInfo(
+                        name=rel_path.replace("\\", "/"),
+                        width=width,
+                        height=height,
+                        has_annotations=os.path.exists(label_path)
+                    ))
+                except UnidentifiedImageError:
+                    print(f"Cannot identify image file: {image_path}")
     return image_info_list
 @app.get("/api/annotate/image/{image_name:path}")
     with open(file_path, "wb") as f:
         f.write(await file.read())
     shutil.copy(file_path, f'{Config.IMAGE_SOURCE_PATH}/{file.filename}')
+    return {"message": f"Uploaded {file.filename} to train set"}
+####################### ----train---- #############################
+current_process = {}
+def reset_current_process():
+    global current_process
+    current_process = {
+        "process": None
+    }
+reset_current_process()
+# Define a function to handle cleanup
+def handle_exit(signal_received, frame):
+    if current_process["process"]:
+        os.killpg(os.getpgid(current_process['process'].pid), signal.SIGKILL)
+    sys.exit(0)
+# Register the signal handler for SIGINT
+signal.signal(signal.SIGINT, handle_exit)
+@app.get("/api/annotate/train")
+async def upload_image(recreate_dataset: bool = False):
+    os.environ['PYTHONUNBUFFERED'] = "1"
+    # Skip if the training process is already running
+    if is_process_running("comic_panel_extractor.train"):
+        return {"status": "ignored", "message": "Training already in progress."}
+    reset_current_process()
+    cmd_to_run=""
+    if recreate_dataset:
+        cmd_to_run = "python -m comic_panel_extractor.create_dataset && "
+    cmd_to_run += "python -m comic_panel_extractor.train"
+    async def run_and_stream_output():
+        process = None
+        try:
+            process = subprocess.Popen(
+                cmd_to_run,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                bufsize=1,
+                universal_newlines=True,
+                preexec_fn=os.setsid,
+		        env={**os.environ, 'PYTHONUNBUFFERED': '1', 'CUDA_LAUNCH_BLOCKING': '1', 'USE_CPU_IF_POSSIBLE': str(common.get_device() == "cpu")}
+            )
+            # Set non-blocking I/O
+            fd = process.stdout.fileno()
+            fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+            fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
+            current_process['process'] = process
+            # Stream the output and send it via WebSocket in real-time
+            while True:
+                try:
+                    output = process.stdout.readline()
+                    if output:
+                        print(output.strip())
+                        print("Active connections:", len(manager.active_connections))
+                        asyncio.create_task(manager.broadcast({
+                            'type': 'command_output',
+                            'data': output.strip()
+                        }))
+                        sys.stdout.flush()
+                    if process.poll() is not None:
+                        break
+                    # Small delay to prevent CPU spinning
+                    await asyncio.sleep(0.01)
+                except Exception as e:
+                    print(f"Error reading process output: {e}")
+                    break
+            # Process finished
+            return_code = process.returncode if process else -1
+            asyncio.create_task(manager.broadcast({
+                'type': 'command_finished',
+                'return_code': return_code
+            }))
+        except Exception as e:
+            print(f"Error in run_and_stream_output: {e}")
+            asyncio.create_task(manager.broadcast({
+                'type': 'command_error',
+                'error': str(e)
+            }))
+        finally:
+            current_process['process'] = None
+    # Start the command execution in a separate task
+    asyncio.create_task(run_and_stream_output())
+    return {"message": "Command started!", "status": "started"}
+@app.get("/api/annotate/stopTrain")
+async def stop_train():
+    try:
+        # Check if there's actually a process to stop
+        if current_process['process'] is None:
+            return {'message': 'No command is currently running.', 'status': 'no_process'}
+        # Check if process has already terminated naturally
+        if current_process['process'].poll() is not None:
+            # Process already finished, just clean up
+            reset_current_process()
+            return {'message': 'Command has already finished.', 'status': 'already_finished'}
+        try:
+            # Get the process group ID before attempting to kill
+            pgid = os.getpgid(current_process['process'].pid)
+            # Kill the entire process group
+            os.killpg(pgid, signal.SIGTERM)  # Try SIGTERM first
+            # Wait a bit for graceful shutdown
+            await asyncio.sleep(1)
+            # If still running, force kill
+            if current_process['process'] and current_process['process'].poll() is None:
+                os.killpg(pgid, signal.SIGKILL)
+        except ProcessLookupError:
+            # Process already dead
+            print("Process already terminated")
+        except OSError as e:
+            # Handle permission errors or other OS-level issues
+            print(f"Error terminating process: {e}")
+            # Try to kill just the main process if group kill fails
+            try:
+                current_process['process'].terminate()
+                await asyncio.sleep(0.5)
+                if current_process['process'].poll() is None:
+                    current_process['process'].kill()
+            except:
+                pass
+        # Always reset the process state
+        reset_current_process()
+        # Notify connected clients
+        await manager.broadcast({
+            'type': 'command_stopped',
+            'message': 'Command terminated by user'
+        })
+        return {'message': 'Command terminated successfully.', 'status': 'terminated'}
+    except Exception as e:
+        print(f"Error in stop_command: {str(e)}")
+        # Force reset even if there was an error
+        reset_current_process()
+        raise HTTPException(status_code=500, detail=f'Error stopping command: {str(e)}')
+def is_process_running(name: str) -> bool:
+    """
+    Check if a process containing 'name' in its command line is running.
+    """
+    for proc in psutil.process_iter(['cmdline']):
+        try:
+            cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
+            if name in cmdline:
+                return True
+        except (psutil.NoSuchProcess, psutil.AccessDenied):
+            continue
+    return False

comic_panel_extractor/config.py CHANGED Viewed

@@ -1,20 +1,37 @@
 from dataclasses import dataclass
 import os
 from dotenv import load_dotenv
 load_dotenv()
 @dataclass
 class Config:
 	"""Configuration settings for the comic-to-video pipeline."""
 	org_input_path: str = ""
 	input_path: str = ""
-	current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
-	EPOCH = int(os.getenv('EPOCH', '200'))
-	YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
-	yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
-	YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"
-	yolo_trained_model_path: str = f'{current_path}/{YOLO_MODEL_NAME}.pt'
 	black_overlay_input_path: str = ""
 	output_folder: str = "temp_dir"
 	distance_threshold: int = 70
@@ -24,13 +41,14 @@ class Config:
 	min_area_ratio: float = 0.05
 	min_width_ratio: float = 0.15
 	min_height_ratio: float = 0.15
 	# Additional parameters for BorderPanelExtractor
 	panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
-	"""Configuration class to manage environment variables and paths."""
-	DEFAULT_IMAGE_SIZE = 640
-	SUPPORTED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG']
 def get_text_cood_file_path(config: Config):
-	return f'{config.output_folder}/{config.text_cood_file_name}'

 from dataclasses import dataclass
 import os
+import toml
 from dotenv import load_dotenv
 load_dotenv()
+CURRENT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+CONFIG_FILE = f"{CURRENT_PATH}/config.toml"
+# Load TOML config
+if os.path.exists(CONFIG_FILE):
+	config_data = toml.load(CONFIG_FILE)
+else:
+	raise FileNotFoundError(f"Config file not found: {CONFIG_FILE}")
 @dataclass
 class Config:
 	"""Configuration settings for the comic-to-video pipeline."""
+	current_path: str = CURRENT_PATH
+	# Read from TOML config
+	EPOCH: int = int(config_data.get("EPOCH", 200))
+	YOLO_BASE_MODEL_NAME: str = config_data.get("YOLO_BASE_MODEL_NAME", "yolo11s-seg")
+	YOLO_MODEL_NAME: str = config_data.get("YOLO_MODEL_NAME", f"comic_panel_{YOLO_BASE_MODEL_NAME}")
+	IMAGE_SOURCE_PATH: str = config_data.get("IMAGE_SOURCE_PATH", "")
+	# Derived paths
+	yolo_base_model_path: str = f"{current_path}/{YOLO_BASE_MODEL_NAME}.pt"
+	yolo_trained_model_path: str = f"{current_path}/{YOLO_MODEL_NAME}.pt"
+	# Other parameters
 	org_input_path: str = ""
 	input_path: str = ""
 	black_overlay_input_path: str = ""
 	output_folder: str = "temp_dir"
 	distance_threshold: int = 70
 	min_area_ratio: float = 0.05
 	min_width_ratio: float = 0.15
 	min_height_ratio: float = 0.15
 	# Additional parameters for BorderPanelExtractor
 	panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
+	# Static constants
+	DEFAULT_IMAGE_SIZE: int = 640
+	SUPPORTED_EXTENSIONS: list = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')
 def get_text_cood_file_path(config: Config):
+	"""Return full path to text coordinate file."""
+	return f"{config.output_folder}/{config.text_cood_file_name}"

comic_panel_extractor/config.toml ADDED Viewed

	@@ -0,0 +1,4 @@

+EPOCH=200
+YOLO_BASE_MODEL_NAME="yolo11s-seg"
+YOLO_MODEL_NAME="comic_panel_yolo11s-seg"
+IMAGE_SOURCE_PATH="/home/jebin/git/comic-panel-extractor/comic_panel_extractor/images"

comic_panel_extractor/create_dataset.py CHANGED Viewed

@@ -7,7 +7,7 @@ from tqdm import tqdm
 from .config import Config
 load_dotenv()
-SOURCE_PATHS = os.getenv('SOURCE_PATH')
 if not SOURCE_PATHS:
     raise ValueError("SOURCE_PATH not set")

 from .config import Config
 load_dotenv()
+SOURCE_PATHS = Config.IMAGE_SOURCE_PATH
 if not SOURCE_PATHS:
     raise ValueError("SOURCE_PATH not set")

comic_panel_extractor/image_labels/image labels generated here.info ADDED Viewed

File without changes

comic_panel_extractor/images/Place the images here.info ADDED Viewed

File without changes

comic_panel_extractor/server.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from .extractor_server import app as extractor_app, delete_folder_if_old_or_empty, output_folder
 from .annorator_server import app as annotator_app
-import os
 from .config import Config
 from fastapi import Request
@@ -20,6 +21,16 @@ fast_api.mount("/static", StaticFiles(directory=static_folder), name="static")
 fast_api.include_router(extractor_app)
 fast_api.include_router(annotator_app)
 # Templates
 template_dirs = [static_folder]
 env = Environment(

 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
 from .extractor_server import app as extractor_app, delete_folder_if_old_or_empty, output_folder
 from .annorator_server import app as annotator_app
+import os, json
 from .config import Config
 from fastapi import Request
 fast_api.include_router(extractor_app)
 fast_api.include_router(annotator_app)
+# Add CORS middleware
+fast_api.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Templates
 template_dirs = [static_folder]
 env = Environment(

comic_panel_extractor/static/annotator.html CHANGED Viewed

@@ -5,6 +5,11 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>📸 Comic Panel Annotator</title>
     <style>
         * {
             margin: 0;
@@ -241,6 +246,10 @@
             width: 100%;
         }
         /* Navigation Controls */
         .image-nav {
             display: flex;
@@ -512,6 +521,85 @@
             border-color: #ffc107 !important;
             background: #fff3cd !important;
         }
     </style>
 </head>
@@ -589,12 +677,15 @@
                 </div>
-                <div class="file-upload" style="display: none;">
                     <input type="file" id="uploadFile" accept="image/*">
                     <label for="uploadFile" class="file-upload-label">
                         📤 Drop or click to upload
                     </label>
                 </div>
             </div>
             <!-- Progress -->
@@ -760,7 +851,44 @@
     <!-- Alerts Container -->
     <div class="alerts" id="alerts"></div>
     <script>
         class ComicAnnotator {
             constructor() {
                 this.canvas = document.getElementById('annotationCanvas');
@@ -891,6 +1019,54 @@
                 // Make canvas focusable for keyboard events
                 this.canvas.tabIndex = 0;
             }
             updateCanvasCursor() {
@@ -1533,7 +1709,7 @@
                     saved: false
                 };
                 this.drawCanvas();
-                this.showModeIndicator('Segmentation Mode', 'segmentation');
             }
             startBboxDrawing(pos) {
@@ -2649,11 +2825,12 @@
                         await this.loadImages();
                         // Auto-select the uploaded image
-                        const index = this.images.findIndex(img => img.name === file.name);
                         if (index >= 0) {
                             this.currentImageIndex = index;
-                            document.getElementById('imageSelect').value = file.name;
-                            this.loadImage(file.name);
                         }
                     } else {
                         throw new Error('Upload failed');
@@ -2681,6 +2858,53 @@
                     }
                 }, 5000);
             }
         }
         // Initialize the application when the page loads

     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>📸 Comic Panel Annotator</title>
+    <!-- Xterm.js Files -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/xterm@5.3.0/css/xterm.min.css" />
+    <script src="https://cdn.jsdelivr.net/npm/xterm@5.3.0/lib/xterm.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/xterm-addon-fit@0.8.0/lib/xterm-addon-fit.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/xterm-addon-web-links@0.9.0/lib/xterm-addon-web-links.min.js"></script>
     <style>
         * {
             margin: 0;
             width: 100%;
         }
+        .trainBtn {
+            width: 100%;
+            margin-top: 10px;
+        }
         /* Navigation Controls */
         .image-nav {
             display: flex;
             border-color: #ffc107 !important;
             background: #fff3cd !important;
         }
+        .modal {
+            display: none;
+            /* Hidden by default */
+            position: fixed;
+            /* Stay in place */
+            z-index: 1000;
+            /* Sit on top */
+            left: 0;
+            top: 0;
+            width: 100%;
+            /* Full width */
+            height: 100%;
+            /* Full height */
+            overflow: auto;
+            /* Enable scroll if needed */
+            /* Black w/ opacity */
+            background-color: rgba(0, 0, 0, 0.8);
+        }
+        .modal-content {
+            background-color: #fefefe;
+            /* White background */
+            margin: 2% 0 0 28%;
+            /* 15% from the top and centered */
+            padding: 20px;
+            border: 1px solid #888;
+            /* Border */
+            width: 80%;
+            /* Could be more or less, depending on screen size */
+            max-width: 600px;
+            /* Maximum width */
+            border-radius: 8px;
+            /* Rounded corners */
+            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
+            /* Shadow */
+            background: linear-gradient(135deg, #fff, #f0f0f0);
+            border: 4px solid #000000;
+            border-radius: 20px;
+            box-shadow: 0 0 30px rgba(0, 0, 0, 0.3);
+            padding: 30px;
+        }
+        #outputModal .modal-content {
+            height: 85vh;
+            background: black;
+            overflow: hidden; /* Xterm handles its own scrollbar */
+            padding: 15px;
+            box-sizing: border-box;
+        }
+        .all-button {
+            position: fixed;
+            top: 50px;
+            right: 10%;
+            display: flex;
+            flex-direction: column;
+            gap: 10px;
+            z-index: 1000;
+        }
+        .clear-button,
+        .stop-button {
+            float: right;
+            font-size: 0.8em;
+            width: auto;
+            padding: 8px 12px;
+            margin: 2px;
+            margin-bottom: 10px;
+        }
+        .close {
+            color: #FF6B6B;
+            font-size: 40px;
+            transition: all 0.3s ease;
+            cursor: pointer;
+            position: fixed;
+            top: 22px;
+            z-index: 10000;
+        }
     </style>
 </head>
                 </div>
+                <div class="file-upload">
                     <input type="file" id="uploadFile" accept="image/*">
                     <label for="uploadFile" class="file-upload-label">
                         📤 Drop or click to upload
                     </label>
                 </div>
+                <button class="btn btn-primary btn-sm trainBtn" id="trainBtn">
+                    Train
+                </button>
             </div>
             <!-- Progress -->
     <!-- Alerts Container -->
     <div class="alerts" id="alerts"></div>
+    <div id="outputModal" class="modal">
+        <div class="modal-content" style="max-width: none; margin: auto;">
+            <span class="close" id="closeModal">×</span>
+            <div class="all-button">
+                <!-- REMOVED: Scroll control button -->
+                <button class="stop-button" id="stopTrain">Stop</button>
+                <button class="clear-button" id="clearOutput">Clear</button>
+            </div>
+            <!-- This div will host the xterm.js terminal -->
+            <div id="output"></div>
+        </div>
+    </div>
     <script>
+        // NEW: Xterm.js variables
+        let term;
+        let fitAddon;
+        function getWebSocketURL() {
+            const isLocal = window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1';
+            if (isLocal) {
+                return 'ws://localhost:' + window.location.port + '/ws';
+            } else {
+                // Use current domain for Spaces
+                const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+                return `${protocol}//${window.location.host}/ws`;
+            }
+        }
+        const socket = new WebSocket(getWebSocketURL());
+        socket.onmessage = function (event) {
+            if (!term) return;
+            const data = JSON.parse(event.data);
+            term.write(data.data+"\n");
+        };
         class ComicAnnotator {
             constructor() {
                 this.canvas = document.getElementById('annotationCanvas');
                 // Make canvas focusable for keyboard events
                 this.canvas.tabIndex = 0;
+                document.getElementById('trainBtn').addEventListener('click', async (e) => {
+                    try {
+                        this.openXterm();
+                        const response = await fetch('/api/annotate/train?recreate_dataset=true');
+                        if (!response.ok) {
+                            throw new Error(`Server error: ${response.status}`);
+                        }
+                        const result = await response.json();
+                        this.showAlert(result.message, 'success');
+                    } catch (error) {
+                        if (term) {
+                            term.write(`\x1b[31m[Error starting command: ${error.message}]\x1b[0m\r\n`);
+                        } else {
+                            this.showAlert('Error starting command: ' + error.message, 'error');
+                        }
+                    }
+                    // Reset file input
+                    document.getElementById('uploadFile').value = '';
+                });
+                document.getElementById('stopTrain').addEventListener('click', async (e) => {
+                    this.stopTrain()
+                });
+                document.getElementById('clearOutput').addEventListener('click', async (e) => {
+                    this.clearOutput()
+                });
+                document.getElementById('closeModal').addEventListener('click', async (e) => {
+                    this.closeTrainModal()
+                });
+                // NEW: Add resize listener to refit terminal on window resize
+                window.addEventListener('resize', () => {
+                if (document.getElementById('outputModal').style.display === 'block' && fitAddon) {
+                    try {
+                        fitAddon.fit();
+                    } catch (e) {
+                        console.error("Error fitting terminal on resize:", e);
+                    }
+                }
+                });
             }
             updateCanvasCursor() {
                     saved: false
                 };
                 this.drawCanvas();
+                this.showModeIndicator('Segmentation Mode, After pointing three/more points press enter to release', 'segmentation');
             }
             startBboxDrawing(pos) {
                         await this.loadImages();
                         // Auto-select the uploaded image
+                        var new_file_name = "train/" + file.name
+                        const index = this.images.findIndex(img => img.name === new_file_name);
                         if (index >= 0) {
                             this.currentImageIndex = index;
+                            document.getElementById('imageSelect').value = new_file_name;
+                            this.loadImage(new_file_name);
                         }
                     } else {
                         throw new Error('Upload failed');
                     }
                 }, 5000);
             }
+            ////////////////////////// ----train---- //////////////////////////
+            openXterm() {
+                const modal = document.getElementById('outputModal');
+                modal.style.display = 'block';
+                // Initialize terminal on first run
+                if (!term) {
+                    term = new Terminal({
+                        cursorBlink: true,
+                        convertEol: true,
+                        theme: {
+                            background: '#000000',
+                            foreground: '#00FF7F', // SpringGreen
+                            cursor: 'rgba(255, 255, 255, 0.5)'
+                        }
+                    });
+                    fitAddon = new FitAddon.FitAddon();
+                    const webLinksAddon = new WebLinksAddon.WebLinksAddon();
+                    term.loadAddon(fitAddon);
+                    term.loadAddon(webLinksAddon);
+                    term.open(document.getElementById('output'));
+                }
+                // Use a short timeout to ensure the modal is visible before fitting
+                setTimeout(() => fitAddon.fit(), 50);
+                term.clear();
+                term.focus();
+                term.write('\x1b[33mRunning command...\x1b[0m\r\n');
+            }
+            clearOutput() {
+                if (term) {
+                    term.clear();
+                }
+            }
+            stopTrain() {
+                fetch('/api/annotate/stopTrain', {
+                    method: 'GET',
+                    headers: { 'Content-Type': 'application/json' }
+                })
+            }
+            closeTrainModal() {
+                const modal = document.getElementById('outputModal');
+                modal.style.display = 'none';
+            }
         }
         // Initialize the application when the page loads

comic_panel_extractor/static/index.html CHANGED Viewed

@@ -385,6 +385,10 @@
         </div>
         <div class="footer-note">
             Currently using pretrained model from
             <a href="https://huggingface.co/mosesb/best-comic-panel-detection" target="_blank">mosesb/best-comic-panel-detection</a>
             until custom training is complete.

         </div>
         <div class="footer-note">
+            <div>
+                To train your own model
+                <a href="/annotate" target="_blank">model</a>
+            </div>
             Currently using pretrained model from
             <a href="https://huggingface.co/mosesb/best-comic-panel-detection" target="_blank">mosesb/best-comic-panel-detection</a>
             until custom training is complete.

comic_panel_extractor/ws_manager.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from fastapi import WebSocket
+import json
+from typing import List
+# WebSocket connection manager
+class ConnectionManager:
+    def __init__(self):
+        self.active_connections: List[WebSocket] = []
+    async def connect(self, websocket: WebSocket):
+        await websocket.accept()
+        self.active_connections.append(websocket)
+    def disconnect(self, websocket: WebSocket):
+        self.active_connections.remove(websocket)
+    async def send_personal_message(self, message: str, websocket: WebSocket):
+        await websocket.send_text(message)
+    async def broadcast(self, message: dict):
+        for connection in self.active_connections:
+            try:
+                await connection.send_text(json.dumps(message, ensure_ascii=False))
+            except:
+                # Remove disconnected connections
+                if connection in self.active_connections:
+                    self.active_connections.remove(connection)
+manager = ConnectionManager()

requirements.txt CHANGED Viewed

@@ -4,6 +4,7 @@ opencv-python
 easyocr
 fastapi
 uvicorn
 python-multipart
 jinja2
 scikit-image
@@ -13,4 +14,5 @@ ultralytics
 Pillow
 opencv-contrib-python
 dotenv
-tqdm

 easyocr
 fastapi
 uvicorn
+websockets
 python-multipart
 jinja2
 scikit-image
 Pillow
 opencv-contrib-python
 dotenv
+tqdm
+toml