|
|
""" |
|
|
Model Manager - Handles loading and caching of YOLO and VLM models |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import ( |
|
|
Qwen2_5_VLForConditionalGeneration, |
|
|
AutoProcessor, |
|
|
BitsAndBytesConfig |
|
|
) |
|
|
from ultralytics import YOLO |
|
|
import os |
|
|
from typing import Tuple |
|
|
|
|
|
from config import ( |
|
|
YOLO_MODEL_PATH, |
|
|
VLM_MODEL_ID, |
|
|
QUANTIZATION_CONFIG, |
|
|
YOLO_CONFIDENCE_THRESHOLD |
|
|
) |
|
|
|
|
|
|
|
|
class ModelManager: |
|
|
"""Singleton class to manage model loading and inference""" |
|
|
|
|
|
_instance = None |
|
|
_initialized = False |
|
|
|
|
|
def __new__(cls): |
|
|
if cls._instance is None: |
|
|
cls._instance = super(ModelManager, cls).__new__(cls) |
|
|
return cls._instance |
|
|
|
|
|
def __init__(self): |
|
|
if not ModelManager._initialized: |
|
|
self.yolo_model = None |
|
|
self.vlm_model = None |
|
|
self.processor = None |
|
|
ModelManager._initialized = True |
|
|
|
|
|
def load_models(self): |
|
|
"""Load both YOLO and VLM models into memory""" |
|
|
print("π Starting model loading...") |
|
|
|
|
|
|
|
|
self.yolo_model = self._load_yolo_model() |
|
|
|
|
|
|
|
|
self.vlm_model, self.processor = self._load_vlm_model() |
|
|
|
|
|
|
|
|
self._warmup_models() |
|
|
|
|
|
print("β
All models loaded successfully!") |
|
|
|
|
|
def _load_yolo_model(self) -> YOLO: |
|
|
"""Load trained YOLO model for signature and stamp detection""" |
|
|
if not os.path.exists(YOLO_MODEL_PATH): |
|
|
raise FileNotFoundError( |
|
|
f"YOLO model not found at {YOLO_MODEL_PATH}. " |
|
|
"Please ensure best.pt is in utils/models/" |
|
|
) |
|
|
|
|
|
yolo_model = YOLO(str(YOLO_MODEL_PATH)) |
|
|
print(f"β
YOLO model loaded from {YOLO_MODEL_PATH}") |
|
|
return yolo_model |
|
|
|
|
|
def _load_vlm_model(self) -> Tuple: |
|
|
""" |
|
|
Load Qwen2.5-VL model with 4-bit quantization |
|
|
Downloads from Hugging Face on first run |
|
|
""" |
|
|
print(f"π₯ Loading VLM model: {VLM_MODEL_ID}") |
|
|
print(" (This will download ~4GB on first run)") |
|
|
|
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
load_in_4bit=QUANTIZATION_CONFIG["load_in_4bit"], |
|
|
bnb_4bit_quant_type=QUANTIZATION_CONFIG["bnb_4bit_quant_type"], |
|
|
bnb_4bit_compute_dtype=getattr(torch, QUANTIZATION_CONFIG["bnb_4bit_compute_dtype"]), |
|
|
bnb_4bit_use_double_quant=QUANTIZATION_CONFIG["bnb_4bit_use_double_quant"] |
|
|
) |
|
|
|
|
|
|
|
|
processor = AutoProcessor.from_pretrained( |
|
|
VLM_MODEL_ID, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
|
|
|
model = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
|
|
VLM_MODEL_ID, |
|
|
quantization_config=bnb_config, |
|
|
device_map="auto", |
|
|
torch_dtype=torch.bfloat16, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
model.eval() |
|
|
print(f"β
Qwen2.5-VL model loaded successfully") |
|
|
|
|
|
return model, processor |
|
|
|
|
|
def _warmup_models(self): |
|
|
"""Warm up models with a dummy inference to initialize CUDA context""" |
|
|
print("π₯ Warming up models (initializing CUDA context)...") |
|
|
import time |
|
|
from PIL import Image |
|
|
import numpy as np |
|
|
|
|
|
warmup_start = time.time() |
|
|
|
|
|
|
|
|
dummy_image = Image.fromarray(np.ones((100, 100, 3), dtype=np.uint8) * 255) |
|
|
|
|
|
try: |
|
|
|
|
|
messages = [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "image", "image": dummy_image}, |
|
|
{"type": "text", "text": "warm up"} |
|
|
] |
|
|
} |
|
|
] |
|
|
|
|
|
from qwen_vl_utils import process_vision_info |
|
|
text = self.processor.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True |
|
|
) |
|
|
image_inputs, video_inputs = process_vision_info(messages) |
|
|
inputs = self.processor( |
|
|
text=[text], |
|
|
images=image_inputs, |
|
|
videos=video_inputs, |
|
|
padding=True, |
|
|
return_tensors="pt", |
|
|
) |
|
|
inputs = inputs.to("cuda") |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
_ = self.vlm_model.generate(**inputs, max_new_tokens=5) |
|
|
|
|
|
|
|
|
del inputs |
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
warmup_time = time.time() - warmup_start |
|
|
print(f"β
Models warmed up in {warmup_time:.2f}s (CUDA context initialized)") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β οΈ Warmup failed (non-critical): {e}") |
|
|
|
|
|
def detect_sign_stamp(self, image_path: str): |
|
|
""" |
|
|
Detect signature and stamp in the image using YOLO |
|
|
|
|
|
Returns: |
|
|
tuple: (signature_info, stamp_info, signature_conf, stamp_conf) |
|
|
""" |
|
|
if self.yolo_model is None: |
|
|
raise RuntimeError("YOLO model not loaded. Call load_models() first.") |
|
|
|
|
|
results = self.yolo_model(image_path, verbose=False)[0] |
|
|
|
|
|
signature_info = {"present": False, "bbox": None} |
|
|
stamp_info = {"present": False, "bbox": None} |
|
|
signature_conf = 0.0 |
|
|
stamp_conf = 0.0 |
|
|
|
|
|
if results.boxes is not None: |
|
|
for box in results.boxes: |
|
|
cls_id = int(box.cls[0]) |
|
|
conf = float(box.conf[0]) |
|
|
|
|
|
if conf > YOLO_CONFIDENCE_THRESHOLD: |
|
|
bbox = box.xyxy[0].cpu().numpy().tolist() |
|
|
bbox = [int(coord) for coord in bbox] |
|
|
|
|
|
|
|
|
if cls_id == 0 and conf > signature_conf: |
|
|
signature_info = {"present": True, "bbox": bbox} |
|
|
signature_conf = conf |
|
|
elif cls_id == 1 and conf > stamp_conf: |
|
|
stamp_info = {"present": True, "bbox": bbox} |
|
|
stamp_conf = conf |
|
|
|
|
|
return signature_info, stamp_info, signature_conf, stamp_conf |
|
|
|
|
|
def is_loaded(self) -> bool: |
|
|
"""Check if models are loaded""" |
|
|
return (self.yolo_model is not None and |
|
|
self.vlm_model is not None and |
|
|
self.processor is not None) |
|
|
|
|
|
|
|
|
|
|
|
model_manager = ModelManager() |
|
|
|