jebin2 commited on
Commit
0b2f929
Β·
1 Parent(s): 3b74526

allowd cpu

Browse files
comic_panel_extractor/comic.yaml CHANGED
@@ -1,5 +1,5 @@
1
- path: /home/jebineinstein/git/comic-panel-extractor/comic_panel_extractor/dataset
2
- train: /home/jebineinstein/git/comic-panel-extractor/comic_panel_extractor/dataset/images/train
3
- val: /home/jebineinstein/git/comic-panel-extractor/comic_panel_extractor/dataset/images/val
4
  nc: 1
5
  names: ['panel']
 
1
+ path: /home/jebin/git/comic-panel-extractor/comic_panel_extractor/dataset
2
+ train: /home/jebin/git/comic-panel-extractor/comic_panel_extractor/dataset/images/train
3
+ val: /home/jebin/git/comic-panel-extractor/comic_panel_extractor/dataset/images/val
4
  nc: 1
5
  names: ['panel']
comic_panel_extractor/common.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import os
3
+ import shutil
4
+ import string
5
+ import secrets
6
+ import hashlib
7
+ import random
8
+ import time
9
+ import re
10
+
11
+ def get_files_count(directory_path):
12
+ return len(os.listdir(directory_path))
13
+
14
+ def generate_random_string(length=10):
15
+ characters = string.ascii_letters
16
+ random_string = ''.join(secrets.choice(characters) for _ in range(length))
17
+ return random_string
18
+
19
+ def generate_random_string_from_input(input_string, length=16):
20
+ # Hash the input string to get a consistent value
21
+ hash_object = hashlib.sha256(input_string.encode())
22
+ hashed_string = hash_object.hexdigest()
23
+
24
+ # Use the hash to seed the random number generator
25
+ random.seed(hashed_string)
26
+
27
+ # Generate a random string based on the seed
28
+ characters = string.ascii_letters + string.digits
29
+ random_string = ''.join(random.choice(characters) for _ in range(length))
30
+
31
+ return random_string
32
+
33
+ def is_mostly_black(frame, black_threshold=20, percentage_threshold=0.9, sample_rate=10):
34
+ """
35
+ Fast black frame detection using pixel sampling.
36
+
37
+ Args:
38
+ frame: OpenCV BGR frame (NumPy array)
39
+ black_threshold: grayscale value below which a pixel is considered black
40
+ percentage_threshold: fraction of black pixels to consider frame mostly black
41
+ sample_rate: sample every N-th pixel in both dimensions (higher = faster)
42
+ Returns:
43
+ True if mostly black, False otherwise
44
+ """
45
+ import cv2
46
+ import numpy as np
47
+ if frame is None or frame.size == 0:
48
+ return True
49
+ # Convert to grayscale
50
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
51
+ # Sample pixels
52
+ sampled = gray[::sample_rate, ::sample_rate]
53
+ black_count = np.sum(sampled < black_threshold)
54
+ total_count = sampled.size
55
+ return (black_count / total_count) >= percentage_threshold
56
+
57
+ def only_alpha(text: str) -> str:
58
+ # Keep only alphabetic characters (make lowercase to ignore case)
59
+ return re.sub(r'[^a-zA-Z]', '', text).lower()
60
+
61
+ def manage_gpu(size_gb: float = 0, gpu_index: int = 0, action: str = "check"):
62
+ """
63
+ Manage GPU memory:
64
+ - check β†’ just prints memory + process table
65
+ - clear_cache β†’ clears PyTorch cache
66
+ - kill β†’ kills all GPU processes
67
+ """
68
+ try:
69
+ import pynvml,signal, gc
70
+ pynvml.nvmlInit()
71
+ handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_index)
72
+ info = pynvml.nvmlDeviceGetMemoryInfo(handle)
73
+
74
+ free_gb = info.free / 1024**3
75
+ total_gb = info.total / 1024**3
76
+
77
+ print(f"\nGPU {gpu_index}: Free {free_gb:.2f} GB / Total {total_gb:.2f} GB")
78
+
79
+ # Show processes
80
+ processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
81
+ print("\nActive GPU Processes:")
82
+ print(f"{'PID':<8} {'Process Name':<40} {'Used (GB)':<10}")
83
+ print("-" * 60)
84
+ for p in processes:
85
+ used_gb = p.usedGpuMemory / 1024**3
86
+ proc_name = pynvml.nvmlSystemGetProcessName(p.pid).decode(errors="ignore")
87
+ print(f"{p.pid:<8} {proc_name:<40} {used_gb:.2f}")
88
+
89
+ if action == "clear_cache":
90
+ try:
91
+ import torch
92
+ gc.collect()
93
+ gc.collect()
94
+ torch.cuda.empty_cache()
95
+ torch.cuda.reset_peak_memory_stats()
96
+ torch.cuda.synchronize()
97
+ time.sleep(1)
98
+ print("\n🧹 Cleared PyTorch CUDA cache")
99
+ except ImportError:
100
+ print("\n⚠️ PyTorch not installed, cannot clear cache.")
101
+
102
+ elif action == "kill":
103
+ for p in processes:
104
+ proc_name = pynvml.nvmlSystemGetProcessName(p.pid).decode(errors="ignore")
105
+ try:
106
+ os.kill(p.pid, signal.SIGKILL)
107
+ print(f"❌ Killed {p.pid} ({proc_name})")
108
+ except Exception as e:
109
+ print(f"⚠️ Could not kill {p.pid}: {e}")
110
+ manage_gpu(action="clear_cache")
111
+ gc.collect()
112
+ gc.collect()
113
+ return free_gb > size_gb
114
+ except: return False
115
+
116
+ def is_gpu_available(verbose=True):
117
+ import torch
118
+ if not torch.cuda.is_available():
119
+ if verbose:
120
+ print("CUDA not available.")
121
+ return False
122
+
123
+ try:
124
+ # Try a tiny allocation to check if GPU is free & usable
125
+ torch.empty(1, device="cuda")
126
+ if verbose:
127
+ print(f"CUDA available. Using device: {torch.cuda.get_device_name(0)}")
128
+ return True
129
+ except RuntimeError as e:
130
+ if "CUDA-capable device(s) is/are busy or unavailable" in str(e) or \
131
+ "CUDA error" in str(e):
132
+ if verbose:
133
+ print("CUDA detected but busy/unavailable. Please CPU.")
134
+ return False
135
+ raise # re-raise if it's some other unexpected error
136
+
137
+ def get_device(is_vision=False):
138
+ if not is_vision and os.getenv("USE_CPU_IF_POSSIBLE", None):
139
+ return "cpu"
140
+ else:
141
+ return "cuda" if is_gpu_available() else "cpu"
comic_panel_extractor/llm_panel_extractor.py CHANGED
@@ -8,6 +8,7 @@ import os
8
  import shutil
9
  import requests
10
  from pathlib import Path
 
11
 
12
  class LLMPanelExtractor:
13
  """Handles image preprocessing operations."""
@@ -85,7 +86,7 @@ class LLMPanelExtractor:
85
  image_width, image_height = input_image.size
86
 
87
  # Run YOLO detection
88
- detection_results = self.yolo_model.predict(source=input_image_path)
89
  first_detection_result = detection_results[0]
90
  newly_detected_boxes = None
91
  all_processed_boxes = []
 
8
  import shutil
9
  import requests
10
  from pathlib import Path
11
+ from . import common
12
 
13
  class LLMPanelExtractor:
14
  """Handles image preprocessing operations."""
 
86
  image_width, image_height = input_image.size
87
 
88
  # Run YOLO detection
89
+ detection_results = self.yolo_model.predict(source=input_image_path, device=common.get_device())
90
  first_detection_result = detection_results[0]
91
  newly_detected_boxes = None
92
  all_processed_boxes = []
comic_panel_extractor/train.py CHANGED
@@ -196,8 +196,8 @@ def main():
196
 
197
  if __name__ == "__main__":# Configuration
198
  # Configuration
199
- original_dataset_path = "/home/jebineinstein/git/comic-panel-extractor/comic_panel_extractor/dataset"
200
- output_filtered_dataset_path = "/home/jebineinstein/git/comic-panel-extractor/comic_panel_extractor/filtered_dataset"
201
 
202
  print("πŸ” Starting dataset filtering...")
203
  print(f"πŸ“‚ Source: {original_dataset_path}")
 
196
 
197
  if __name__ == "__main__":# Configuration
198
  # Configuration
199
+ original_dataset_path = "/home/jebin/git/comic-panel-extractor/comic_panel_extractor/dataset"
200
+ output_filtered_dataset_path = "/home/jebin/git/comic-panel-extractor/comic_panel_extractor/filtered_dataset"
201
 
202
  print("πŸ” Starting dataset filtering...")
203
  print(f"πŸ“‚ Source: {original_dataset_path}")