jebin2 commited on
Commit
951222c
·
1 Parent(s): e6e7958

add train support

Browse files
.gitignore CHANGED
@@ -209,8 +209,9 @@ temp_dir
209
  input.jpg
210
  comic_panel_extractor/api_outputs/
211
  comic_panel_extractor/dataset/
212
- comic_panel_extractor/images/
213
- comic_panel_extractor/image_labels/
 
214
  comic_panel_extractor/runs/
215
  comic_panel_extractor/temp_dir/
216
  temp.py
 
209
  input.jpg
210
  comic_panel_extractor/api_outputs/
211
  comic_panel_extractor/dataset/
212
+ comic_panel_extractor/images/*.png
213
+ comic_panel_extractor/images/*.jpg
214
+ comic_panel_extractor/image_labels/*.txt
215
  comic_panel_extractor/runs/
216
  comic_panel_extractor/temp_dir/
217
  temp.py
comic_panel_extractor/annorator_server.py CHANGED
@@ -1,8 +1,9 @@
1
- from fastapi import APIRouter, HTTPException, UploadFile, File
2
  from fastapi.responses import FileResponse
 
3
  from pydantic import BaseModel, field_validator
4
  from typing import List
5
- from PIL import Image
6
  import os
7
  import base64
8
  from io import BytesIO
@@ -12,12 +13,28 @@ from typing import List, Optional, Union, Dict, Any
12
  from . import utils
13
  import copy
14
  import traceback
 
 
 
 
 
 
15
 
16
  app = APIRouter()
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  # === Configuration ===
19
- IMAGE_ROOT = os.path.join(Config.current_path, "dataset/images")
20
- LABEL_ROOT = os.path.join(Config.current_path, "dataset/labels")
21
  IMAGE_LABEL_ROOT = os.path.join(Config.current_path, "image_labels")
22
 
23
  CLASS_ID = 0
@@ -64,7 +81,7 @@ def get_image_path(image_name: str) -> str:
64
  return os.path.join(IMAGE_ROOT, image_name)
65
 
66
  def get_label_path(image_name: str) -> str:
67
- return os.path.join(LABEL_ROOT, os.path.splitext(image_name)[0] + ".txt")
68
 
69
  # === Core Functions ===
70
  def load_yolo_annotations(image_path: str, label_path: str, detect: bool = False):
@@ -252,19 +269,23 @@ async def list_all_images():
252
  for root, _, files in os.walk(IMAGE_ROOT):
253
  for file in sorted(files):
254
  if file.lower().endswith((".jpg", ".jpeg", ".png")):
255
- image_path = os.path.join(root, file)
256
- rel_path = os.path.relpath(image_path, IMAGE_ROOT)
257
- label_path = get_label_path(rel_path)
258
-
259
- img = Image.open(image_path)
260
- width, height = img.size
261
-
262
- image_info_list.append(ImageInfo(
263
- name=rel_path.replace("\\", "/"),
264
- width=width,
265
- height=height,
266
- has_annotations=os.path.exists(label_path)
267
- ))
 
 
 
 
268
  return image_info_list
269
 
270
  @app.get("/api/annotate/image/{image_name:path}")
@@ -354,4 +375,176 @@ async def upload_image(file: UploadFile = File(...)):
354
  with open(file_path, "wb") as f:
355
  f.write(await file.read())
356
  shutil.copy(file_path, f'{Config.IMAGE_SOURCE_PATH}/{file.filename}')
357
- return {"message": f"Uploaded {file.filename} to train set"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, UploadFile, File, WebSocket, WebSocketDisconnect
2
  from fastapi.responses import FileResponse
3
+ from .ws_manager import manager
4
  from pydantic import BaseModel, field_validator
5
  from typing import List
6
+ from PIL import Image, UnidentifiedImageError
7
  import os
8
  import base64
9
  from io import BytesIO
 
13
  from . import utils
14
  import copy
15
  import traceback
16
+ import asyncio
17
+ import sys, signal
18
+ import psutil
19
+ import subprocess
20
+ from . import common
21
+ import fcntl
22
 
23
  app = APIRouter()
24
 
25
+ @app.websocket("/ws")
26
+ async def websocket_endpoint(websocket: WebSocket):
27
+ await manager.connect(websocket)
28
+ try:
29
+ while True:
30
+ data = await websocket.receive_text()
31
+ # Handle any websocket messages if needed
32
+ except WebSocketDisconnect:
33
+ print("Client disconnected:", websocket.client)
34
+ manager.disconnect(websocket)
35
+
36
  # === Configuration ===
37
+ IMAGE_ROOT = os.path.join(Config.current_path, "images")
 
38
  IMAGE_LABEL_ROOT = os.path.join(Config.current_path, "image_labels")
39
 
40
  CLASS_ID = 0
 
81
  return os.path.join(IMAGE_ROOT, image_name)
82
 
83
  def get_label_path(image_name: str) -> str:
84
+ return os.path.join(IMAGE_LABEL_ROOT, os.path.splitext(image_name)[0] + ".txt")
85
 
86
  # === Core Functions ===
87
  def load_yolo_annotations(image_path: str, label_path: str, detect: bool = False):
 
269
  for root, _, files in os.walk(IMAGE_ROOT):
270
  for file in sorted(files):
271
  if file.lower().endswith((".jpg", ".jpeg", ".png")):
272
+ try:
273
+ image_path = os.path.join(root, file)
274
+ rel_path = os.path.relpath(image_path, IMAGE_ROOT)
275
+ label_path = get_label_path(rel_path)
276
+
277
+ img = Image.open(image_path)
278
+ width, height = img.size
279
+
280
+ image_info_list.append(ImageInfo(
281
+ name=rel_path.replace("\\", "/"),
282
+ width=width,
283
+ height=height,
284
+ has_annotations=os.path.exists(label_path)
285
+ ))
286
+ except UnidentifiedImageError:
287
+ print(f"Cannot identify image file: {image_path}")
288
+
289
  return image_info_list
290
 
291
  @app.get("/api/annotate/image/{image_name:path}")
 
375
  with open(file_path, "wb") as f:
376
  f.write(await file.read())
377
  shutil.copy(file_path, f'{Config.IMAGE_SOURCE_PATH}/{file.filename}')
378
+ return {"message": f"Uploaded {file.filename} to train set"}
379
+
380
+ ####################### ----train---- #############################
381
+
382
+
383
+ current_process = {}
384
+
385
+ def reset_current_process():
386
+ global current_process
387
+ current_process = {
388
+ "process": None
389
+ }
390
+
391
+ reset_current_process()
392
+
393
+ # Define a function to handle cleanup
394
+ def handle_exit(signal_received, frame):
395
+ if current_process["process"]:
396
+ os.killpg(os.getpgid(current_process['process'].pid), signal.SIGKILL)
397
+ sys.exit(0)
398
+
399
+ # Register the signal handler for SIGINT
400
+ signal.signal(signal.SIGINT, handle_exit)
401
+
402
+ @app.get("/api/annotate/train")
403
+ async def upload_image(recreate_dataset: bool = False):
404
+ os.environ['PYTHONUNBUFFERED'] = "1"
405
+ # Skip if the training process is already running
406
+ if is_process_running("comic_panel_extractor.train"):
407
+ return {"status": "ignored", "message": "Training already in progress."}
408
+ reset_current_process()
409
+ cmd_to_run=""
410
+ if recreate_dataset:
411
+ cmd_to_run = "python -m comic_panel_extractor.create_dataset && "
412
+ cmd_to_run += "python -m comic_panel_extractor.train"
413
+
414
+ async def run_and_stream_output():
415
+ process = None
416
+ try:
417
+ process = subprocess.Popen(
418
+ cmd_to_run,
419
+ shell=True,
420
+ stdout=subprocess.PIPE,
421
+ stderr=subprocess.STDOUT,
422
+ bufsize=1,
423
+ universal_newlines=True,
424
+ preexec_fn=os.setsid,
425
+ env={**os.environ, 'PYTHONUNBUFFERED': '1', 'CUDA_LAUNCH_BLOCKING': '1', 'USE_CPU_IF_POSSIBLE': str(common.get_device() == "cpu")}
426
+ )
427
+
428
+ # Set non-blocking I/O
429
+ fd = process.stdout.fileno()
430
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
431
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
432
+
433
+ current_process['process'] = process
434
+
435
+ # Stream the output and send it via WebSocket in real-time
436
+ while True:
437
+ try:
438
+ output = process.stdout.readline()
439
+ if output:
440
+ print(output.strip())
441
+ print("Active connections:", len(manager.active_connections))
442
+ asyncio.create_task(manager.broadcast({
443
+ 'type': 'command_output',
444
+ 'data': output.strip()
445
+ }))
446
+ sys.stdout.flush()
447
+
448
+ if process.poll() is not None:
449
+ break
450
+
451
+ # Small delay to prevent CPU spinning
452
+ await asyncio.sleep(0.01)
453
+
454
+ except Exception as e:
455
+ print(f"Error reading process output: {e}")
456
+ break
457
+
458
+ # Process finished
459
+ return_code = process.returncode if process else -1
460
+ asyncio.create_task(manager.broadcast({
461
+ 'type': 'command_finished',
462
+ 'return_code': return_code
463
+ }))
464
+
465
+ except Exception as e:
466
+ print(f"Error in run_and_stream_output: {e}")
467
+ asyncio.create_task(manager.broadcast({
468
+ 'type': 'command_error',
469
+ 'error': str(e)
470
+ }))
471
+ finally:
472
+ current_process['process'] = None
473
+
474
+ # Start the command execution in a separate task
475
+ asyncio.create_task(run_and_stream_output())
476
+ return {"message": "Command started!", "status": "started"}
477
+
478
+
479
+ @app.get("/api/annotate/stopTrain")
480
+ async def stop_train():
481
+ try:
482
+ # Check if there's actually a process to stop
483
+ if current_process['process'] is None:
484
+ return {'message': 'No command is currently running.', 'status': 'no_process'}
485
+
486
+ # Check if process has already terminated naturally
487
+ if current_process['process'].poll() is not None:
488
+ # Process already finished, just clean up
489
+ reset_current_process()
490
+ return {'message': 'Command has already finished.', 'status': 'already_finished'}
491
+
492
+ try:
493
+ # Get the process group ID before attempting to kill
494
+ pgid = os.getpgid(current_process['process'].pid)
495
+
496
+ # Kill the entire process group
497
+ os.killpg(pgid, signal.SIGTERM) # Try SIGTERM first
498
+
499
+ # Wait a bit for graceful shutdown
500
+ await asyncio.sleep(1)
501
+
502
+ # If still running, force kill
503
+ if current_process['process'] and current_process['process'].poll() is None:
504
+ os.killpg(pgid, signal.SIGKILL)
505
+
506
+ except ProcessLookupError:
507
+ # Process already dead
508
+ print("Process already terminated")
509
+ except OSError as e:
510
+ # Handle permission errors or other OS-level issues
511
+ print(f"Error terminating process: {e}")
512
+ # Try to kill just the main process if group kill fails
513
+ try:
514
+ current_process['process'].terminate()
515
+ await asyncio.sleep(0.5)
516
+ if current_process['process'].poll() is None:
517
+ current_process['process'].kill()
518
+ except:
519
+ pass
520
+
521
+ # Always reset the process state
522
+ reset_current_process()
523
+
524
+ # Notify connected clients
525
+ await manager.broadcast({
526
+ 'type': 'command_stopped',
527
+ 'message': 'Command terminated by user'
528
+ })
529
+
530
+ return {'message': 'Command terminated successfully.', 'status': 'terminated'}
531
+
532
+ except Exception as e:
533
+ print(f"Error in stop_command: {str(e)}")
534
+ # Force reset even if there was an error
535
+ reset_current_process()
536
+ raise HTTPException(status_code=500, detail=f'Error stopping command: {str(e)}')
537
+
538
+
539
+ def is_process_running(name: str) -> bool:
540
+ """
541
+ Check if a process containing 'name' in its command line is running.
542
+ """
543
+ for proc in psutil.process_iter(['cmdline']):
544
+ try:
545
+ cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
546
+ if name in cmdline:
547
+ return True
548
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
549
+ continue
550
+ return False
comic_panel_extractor/config.py CHANGED
@@ -1,20 +1,37 @@
1
  from dataclasses import dataclass
2
  import os
 
3
 
4
  from dotenv import load_dotenv
5
  load_dotenv()
6
 
 
 
 
 
 
 
 
 
 
7
  @dataclass
8
  class Config:
9
  """Configuration settings for the comic-to-video pipeline."""
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  org_input_path: str = ""
11
  input_path: str = ""
12
- current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
13
- EPOCH = int(os.getenv('EPOCH', '200'))
14
- YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
15
- yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
16
- YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"
17
- yolo_trained_model_path: str = f'{current_path}/{YOLO_MODEL_NAME}.pt'
18
  black_overlay_input_path: str = ""
19
  output_folder: str = "temp_dir"
20
  distance_threshold: int = 70
@@ -24,13 +41,14 @@ class Config:
24
  min_area_ratio: float = 0.05
25
  min_width_ratio: float = 0.15
26
  min_height_ratio: float = 0.15
27
-
28
  # Additional parameters for BorderPanelExtractor
29
  panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
30
 
31
- """Configuration class to manage environment variables and paths."""
32
- DEFAULT_IMAGE_SIZE = 640
33
- SUPPORTED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG']
34
 
35
  def get_text_cood_file_path(config: Config):
36
- return f'{config.output_folder}/{config.text_cood_file_name}'
 
 
1
  from dataclasses import dataclass
2
  import os
3
+ import toml
4
 
5
  from dotenv import load_dotenv
6
  load_dotenv()
7
 
8
+ CURRENT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__)))
9
+ CONFIG_FILE = f"{CURRENT_PATH}/config.toml"
10
+
11
+ # Load TOML config
12
+ if os.path.exists(CONFIG_FILE):
13
+ config_data = toml.load(CONFIG_FILE)
14
+ else:
15
+ raise FileNotFoundError(f"Config file not found: {CONFIG_FILE}")
16
+
17
  @dataclass
18
  class Config:
19
  """Configuration settings for the comic-to-video pipeline."""
20
+ current_path: str = CURRENT_PATH
21
+
22
+ # Read from TOML config
23
+ EPOCH: int = int(config_data.get("EPOCH", 200))
24
+ YOLO_BASE_MODEL_NAME: str = config_data.get("YOLO_BASE_MODEL_NAME", "yolo11s-seg")
25
+ YOLO_MODEL_NAME: str = config_data.get("YOLO_MODEL_NAME", f"comic_panel_{YOLO_BASE_MODEL_NAME}")
26
+ IMAGE_SOURCE_PATH: str = config_data.get("IMAGE_SOURCE_PATH", "")
27
+
28
+ # Derived paths
29
+ yolo_base_model_path: str = f"{current_path}/{YOLO_BASE_MODEL_NAME}.pt"
30
+ yolo_trained_model_path: str = f"{current_path}/{YOLO_MODEL_NAME}.pt"
31
+
32
+ # Other parameters
33
  org_input_path: str = ""
34
  input_path: str = ""
 
 
 
 
 
 
35
  black_overlay_input_path: str = ""
36
  output_folder: str = "temp_dir"
37
  distance_threshold: int = 70
 
41
  min_area_ratio: float = 0.05
42
  min_width_ratio: float = 0.15
43
  min_height_ratio: float = 0.15
44
+
45
  # Additional parameters for BorderPanelExtractor
46
  panel_filename_pattern: str = r"panel_\d+_\((\d+), (\d+), (\d+), (\d+)\)\.jpg"
47
 
48
+ # Static constants
49
+ DEFAULT_IMAGE_SIZE: int = 640
50
+ SUPPORTED_EXTENSIONS: list = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')
51
 
52
  def get_text_cood_file_path(config: Config):
53
+ """Return full path to text coordinate file."""
54
+ return f"{config.output_folder}/{config.text_cood_file_name}"
comic_panel_extractor/config.toml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ EPOCH=200
2
+ YOLO_BASE_MODEL_NAME="yolo11s-seg"
3
+ YOLO_MODEL_NAME="comic_panel_yolo11s-seg"
4
+ IMAGE_SOURCE_PATH="/home/jebin/git/comic-panel-extractor/comic_panel_extractor/images"
comic_panel_extractor/create_dataset.py CHANGED
@@ -7,7 +7,7 @@ from tqdm import tqdm
7
  from .config import Config
8
 
9
  load_dotenv()
10
- SOURCE_PATHS = os.getenv('SOURCE_PATH')
11
 
12
  if not SOURCE_PATHS:
13
  raise ValueError("SOURCE_PATH not set")
 
7
  from .config import Config
8
 
9
  load_dotenv()
10
+ SOURCE_PATHS = Config.IMAGE_SOURCE_PATH
11
 
12
  if not SOURCE_PATHS:
13
  raise ValueError("SOURCE_PATH not set")
comic_panel_extractor/image_labels/image labels generated here.info ADDED
File without changes
comic_panel_extractor/images/Place the images here.info ADDED
File without changes
comic_panel_extractor/server.py CHANGED
@@ -1,8 +1,9 @@
1
  from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
 
3
  from .extractor_server import app as extractor_app, delete_folder_if_old_or_empty, output_folder
4
  from .annorator_server import app as annotator_app
5
- import os
6
  from .config import Config
7
 
8
  from fastapi import Request
@@ -20,6 +21,16 @@ fast_api.mount("/static", StaticFiles(directory=static_folder), name="static")
20
  fast_api.include_router(extractor_app)
21
  fast_api.include_router(annotator_app)
22
 
 
 
 
 
 
 
 
 
 
 
23
  # Templates
24
  template_dirs = [static_folder]
25
  env = Environment(
 
1
  from fastapi import FastAPI
2
  from fastapi.staticfiles import StaticFiles
3
+ from fastapi.middleware.cors import CORSMiddleware
4
  from .extractor_server import app as extractor_app, delete_folder_if_old_or_empty, output_folder
5
  from .annorator_server import app as annotator_app
6
+ import os, json
7
  from .config import Config
8
 
9
  from fastapi import Request
 
21
  fast_api.include_router(extractor_app)
22
  fast_api.include_router(annotator_app)
23
 
24
+
25
+ # Add CORS middleware
26
+ fast_api.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"],
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+
34
  # Templates
35
  template_dirs = [static_folder]
36
  env = Environment(
comic_panel_extractor/static/annotator.html CHANGED
@@ -5,6 +5,11 @@
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
  <title>📸 Comic Panel Annotator</title>
 
 
 
 
 
8
  <style>
9
  * {
10
  margin: 0;
@@ -241,6 +246,10 @@
241
  width: 100%;
242
  }
243
 
 
 
 
 
244
  /* Navigation Controls */
245
  .image-nav {
246
  display: flex;
@@ -512,6 +521,85 @@
512
  border-color: #ffc107 !important;
513
  background: #fff3cd !important;
514
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  </style>
516
  </head>
517
 
@@ -589,12 +677,15 @@
589
  </div>
590
 
591
 
592
- <div class="file-upload" style="display: none;">
593
  <input type="file" id="uploadFile" accept="image/*">
594
  <label for="uploadFile" class="file-upload-label">
595
  📤 Drop or click to upload
596
  </label>
597
  </div>
 
 
 
598
  </div>
599
 
600
  <!-- Progress -->
@@ -760,7 +851,44 @@
760
  <!-- Alerts Container -->
761
  <div class="alerts" id="alerts"></div>
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  class ComicAnnotator {
765
  constructor() {
766
  this.canvas = document.getElementById('annotationCanvas');
@@ -891,6 +1019,54 @@
891
 
892
  // Make canvas focusable for keyboard events
893
  this.canvas.tabIndex = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
894
  }
895
 
896
  updateCanvasCursor() {
@@ -1533,7 +1709,7 @@
1533
  saved: false
1534
  };
1535
  this.drawCanvas();
1536
- this.showModeIndicator('Segmentation Mode', 'segmentation');
1537
  }
1538
 
1539
  startBboxDrawing(pos) {
@@ -2649,11 +2825,12 @@
2649
  await this.loadImages();
2650
 
2651
  // Auto-select the uploaded image
2652
- const index = this.images.findIndex(img => img.name === file.name);
 
2653
  if (index >= 0) {
2654
  this.currentImageIndex = index;
2655
- document.getElementById('imageSelect').value = file.name;
2656
- this.loadImage(file.name);
2657
  }
2658
  } else {
2659
  throw new Error('Upload failed');
@@ -2681,6 +2858,53 @@
2681
  }
2682
  }, 5000);
2683
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2684
  }
2685
 
2686
  // Initialize the application when the page loads
 
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
  <title>📸 Comic Panel Annotator</title>
8
+ <!-- Xterm.js Files -->
9
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/xterm@5.3.0/css/xterm.min.css" />
10
+ <script src="https://cdn.jsdelivr.net/npm/xterm@5.3.0/lib/xterm.min.js"></script>
11
+ <script src="https://cdn.jsdelivr.net/npm/xterm-addon-fit@0.8.0/lib/xterm-addon-fit.min.js"></script>
12
+ <script src="https://cdn.jsdelivr.net/npm/xterm-addon-web-links@0.9.0/lib/xterm-addon-web-links.min.js"></script>
13
  <style>
14
  * {
15
  margin: 0;
 
246
  width: 100%;
247
  }
248
 
249
+ .trainBtn {
250
+ width: 100%;
251
+ margin-top: 10px;
252
+ }
253
  /* Navigation Controls */
254
  .image-nav {
255
  display: flex;
 
521
  border-color: #ffc107 !important;
522
  background: #fff3cd !important;
523
  }
524
+
525
+ .modal {
526
+ display: none;
527
+ /* Hidden by default */
528
+ position: fixed;
529
+ /* Stay in place */
530
+ z-index: 1000;
531
+ /* Sit on top */
532
+ left: 0;
533
+ top: 0;
534
+ width: 100%;
535
+ /* Full width */
536
+ height: 100%;
537
+ /* Full height */
538
+ overflow: auto;
539
+ /* Enable scroll if needed */
540
+ /* Black w/ opacity */
541
+ background-color: rgba(0, 0, 0, 0.8);
542
+ }
543
+
544
+ .modal-content {
545
+ background-color: #fefefe;
546
+ /* White background */
547
+ margin: 2% 0 0 28%;
548
+ /* 15% from the top and centered */
549
+ padding: 20px;
550
+ border: 1px solid #888;
551
+ /* Border */
552
+ width: 80%;
553
+ /* Could be more or less, depending on screen size */
554
+ max-width: 600px;
555
+ /* Maximum width */
556
+ border-radius: 8px;
557
+ /* Rounded corners */
558
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
559
+ /* Shadow */
560
+ background: linear-gradient(135deg, #fff, #f0f0f0);
561
+ border: 4px solid #000000;
562
+ border-radius: 20px;
563
+ box-shadow: 0 0 30px rgba(0, 0, 0, 0.3);
564
+ padding: 30px;
565
+ }
566
+ #outputModal .modal-content {
567
+ height: 85vh;
568
+ background: black;
569
+ overflow: hidden; /* Xterm handles its own scrollbar */
570
+ padding: 15px;
571
+ box-sizing: border-box;
572
+ }
573
+
574
+ .all-button {
575
+ position: fixed;
576
+ top: 50px;
577
+ right: 10%;
578
+ display: flex;
579
+ flex-direction: column;
580
+ gap: 10px;
581
+ z-index: 1000;
582
+ }
583
+
584
+ .clear-button,
585
+ .stop-button {
586
+ float: right;
587
+ font-size: 0.8em;
588
+ width: auto;
589
+ padding: 8px 12px;
590
+ margin: 2px;
591
+ margin-bottom: 10px;
592
+ }
593
+
594
+ .close {
595
+ color: #FF6B6B;
596
+ font-size: 40px;
597
+ transition: all 0.3s ease;
598
+ cursor: pointer;
599
+ position: fixed;
600
+ top: 22px;
601
+ z-index: 10000;
602
+ }
603
  </style>
604
  </head>
605
 
 
677
  </div>
678
 
679
 
680
+ <div class="file-upload">
681
  <input type="file" id="uploadFile" accept="image/*">
682
  <label for="uploadFile" class="file-upload-label">
683
  📤 Drop or click to upload
684
  </label>
685
  </div>
686
+ <button class="btn btn-primary btn-sm trainBtn" id="trainBtn">
687
+ Train
688
+ </button>
689
  </div>
690
 
691
  <!-- Progress -->
 
851
  <!-- Alerts Container -->
852
  <div class="alerts" id="alerts"></div>
853
 
854
+
855
+ <div id="outputModal" class="modal">
856
+ <div class="modal-content" style="max-width: none; margin: auto;">
857
+ <span class="close" id="closeModal">×</span>
858
+ <div class="all-button">
859
+ <!-- REMOVED: Scroll control button -->
860
+ <button class="stop-button" id="stopTrain">Stop</button>
861
+ <button class="clear-button" id="clearOutput">Clear</button>
862
+ </div>
863
+ <!-- This div will host the xterm.js terminal -->
864
+ <div id="output"></div>
865
+ </div>
866
+ </div>
867
  <script>
868
+
869
+ // NEW: Xterm.js variables
870
+ let term;
871
+ let fitAddon;
872
+
873
+ function getWebSocketURL() {
874
+ const isLocal = window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1';
875
+
876
+ if (isLocal) {
877
+ return 'ws://localhost:' + window.location.port + '/ws';
878
+ } else {
879
+ // Use current domain for Spaces
880
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
881
+ return `${protocol}//${window.location.host}/ws`;
882
+ }
883
+ }
884
+ const socket = new WebSocket(getWebSocketURL());
885
+
886
+ socket.onmessage = function (event) {
887
+ if (!term) return;
888
+ const data = JSON.parse(event.data);
889
+ term.write(data.data+"\n");
890
+ };
891
+
892
  class ComicAnnotator {
893
  constructor() {
894
  this.canvas = document.getElementById('annotationCanvas');
 
1019
 
1020
  // Make canvas focusable for keyboard events
1021
  this.canvas.tabIndex = 0;
1022
+
1023
+ document.getElementById('trainBtn').addEventListener('click', async (e) => {
1024
+ try {
1025
+ this.openXterm();
1026
+ const response = await fetch('/api/annotate/train?recreate_dataset=true');
1027
+
1028
+ if (!response.ok) {
1029
+ throw new Error(`Server error: ${response.status}`);
1030
+ }
1031
+
1032
+ const result = await response.json();
1033
+
1034
+ this.showAlert(result.message, 'success');
1035
+
1036
+ } catch (error) {
1037
+ if (term) {
1038
+ term.write(`\x1b[31m[Error starting command: ${error.message}]\x1b[0m\r\n`);
1039
+ } else {
1040
+ this.showAlert('Error starting command: ' + error.message, 'error');
1041
+ }
1042
+ }
1043
+
1044
+ // Reset file input
1045
+ document.getElementById('uploadFile').value = '';
1046
+ });
1047
+
1048
+ document.getElementById('stopTrain').addEventListener('click', async (e) => {
1049
+ this.stopTrain()
1050
+ });
1051
+
1052
+ document.getElementById('clearOutput').addEventListener('click', async (e) => {
1053
+ this.clearOutput()
1054
+ });
1055
+
1056
+ document.getElementById('closeModal').addEventListener('click', async (e) => {
1057
+ this.closeTrainModal()
1058
+ });
1059
+
1060
+ // NEW: Add resize listener to refit terminal on window resize
1061
+ window.addEventListener('resize', () => {
1062
+ if (document.getElementById('outputModal').style.display === 'block' && fitAddon) {
1063
+ try {
1064
+ fitAddon.fit();
1065
+ } catch (e) {
1066
+ console.error("Error fitting terminal on resize:", e);
1067
+ }
1068
+ }
1069
+ });
1070
  }
1071
 
1072
  updateCanvasCursor() {
 
1709
  saved: false
1710
  };
1711
  this.drawCanvas();
1712
+ this.showModeIndicator('Segmentation Mode, After pointing three/more points press enter to release', 'segmentation');
1713
  }
1714
 
1715
  startBboxDrawing(pos) {
 
2825
  await this.loadImages();
2826
 
2827
  // Auto-select the uploaded image
2828
+ var new_file_name = "train/" + file.name
2829
+ const index = this.images.findIndex(img => img.name === new_file_name);
2830
  if (index >= 0) {
2831
  this.currentImageIndex = index;
2832
+ document.getElementById('imageSelect').value = new_file_name;
2833
+ this.loadImage(new_file_name);
2834
  }
2835
  } else {
2836
  throw new Error('Upload failed');
 
2858
  }
2859
  }, 5000);
2860
  }
2861
+ ////////////////////////// ----train---- //////////////////////////
2862
+ openXterm() {
2863
+ const modal = document.getElementById('outputModal');
2864
+ modal.style.display = 'block';
2865
+ // Initialize terminal on first run
2866
+ if (!term) {
2867
+
2868
+ term = new Terminal({
2869
+ cursorBlink: true,
2870
+ convertEol: true,
2871
+ theme: {
2872
+ background: '#000000',
2873
+ foreground: '#00FF7F', // SpringGreen
2874
+ cursor: 'rgba(255, 255, 255, 0.5)'
2875
+ }
2876
+ });
2877
+ fitAddon = new FitAddon.FitAddon();
2878
+ const webLinksAddon = new WebLinksAddon.WebLinksAddon();
2879
+ term.loadAddon(fitAddon);
2880
+ term.loadAddon(webLinksAddon);
2881
+ term.open(document.getElementById('output'));
2882
+ }
2883
+
2884
+ // Use a short timeout to ensure the modal is visible before fitting
2885
+ setTimeout(() => fitAddon.fit(), 50);
2886
+
2887
+ term.clear();
2888
+ term.focus();
2889
+ term.write('\x1b[33mRunning command...\x1b[0m\r\n');
2890
+ }
2891
+
2892
+ clearOutput() {
2893
+ if (term) {
2894
+ term.clear();
2895
+ }
2896
+ }
2897
+
2898
+ stopTrain() {
2899
+ fetch('/api/annotate/stopTrain', {
2900
+ method: 'GET',
2901
+ headers: { 'Content-Type': 'application/json' }
2902
+ })
2903
+ }
2904
+ closeTrainModal() {
2905
+ const modal = document.getElementById('outputModal');
2906
+ modal.style.display = 'none';
2907
+ }
2908
  }
2909
 
2910
  // Initialize the application when the page loads
comic_panel_extractor/static/index.html CHANGED
@@ -385,6 +385,10 @@
385
  </div>
386
 
387
  <div class="footer-note">
 
 
 
 
388
  Currently using pretrained model from
389
  <a href="https://huggingface.co/mosesb/best-comic-panel-detection" target="_blank">mosesb/best-comic-panel-detection</a>
390
  until custom training is complete.
 
385
  </div>
386
 
387
  <div class="footer-note">
388
+ <div>
389
+ To train your own model
390
+ <a href="/annotate" target="_blank">model</a>
391
+ </div>
392
  Currently using pretrained model from
393
  <a href="https://huggingface.co/mosesb/best-comic-panel-detection" target="_blank">mosesb/best-comic-panel-detection</a>
394
  until custom training is complete.
comic_panel_extractor/ws_manager.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import WebSocket
2
+ import json
3
+ from typing import List
4
+
5
+ # WebSocket connection manager
6
+ class ConnectionManager:
7
+ def __init__(self):
8
+ self.active_connections: List[WebSocket] = []
9
+
10
+ async def connect(self, websocket: WebSocket):
11
+ await websocket.accept()
12
+ self.active_connections.append(websocket)
13
+
14
+ def disconnect(self, websocket: WebSocket):
15
+ self.active_connections.remove(websocket)
16
+
17
+ async def send_personal_message(self, message: str, websocket: WebSocket):
18
+ await websocket.send_text(message)
19
+
20
+ async def broadcast(self, message: dict):
21
+ for connection in self.active_connections:
22
+ try:
23
+ await connection.send_text(json.dumps(message, ensure_ascii=False))
24
+ except:
25
+ # Remove disconnected connections
26
+ if connection in self.active_connections:
27
+ self.active_connections.remove(connection)
28
+
29
+ manager = ConnectionManager()
requirements.txt CHANGED
@@ -4,6 +4,7 @@ opencv-python
4
  easyocr
5
  fastapi
6
  uvicorn
 
7
  python-multipart
8
  jinja2
9
  scikit-image
@@ -13,4 +14,5 @@ ultralytics
13
  Pillow
14
  opencv-contrib-python
15
  dotenv
16
- tqdm
 
 
4
  easyocr
5
  fastapi
6
  uvicorn
7
+ websockets
8
  python-multipart
9
  jinja2
10
  scikit-image
 
14
  Pillow
15
  opencv-contrib-python
16
  dotenv
17
+ tqdm
18
+ toml