Spaces:

CSSE416Project
/

CSSE416Project-CarModelRecognition

Sleeping

App Files Files Community

KurtHHHHHH commited on Nov 6, 2025

Commit

5d48d9a

verified ·

1 Parent(s): d96d239

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -2

app.py CHANGED Viewed

@@ -15,7 +15,8 @@ from torchvision import transforms
 # 2. Load model weights
 # --------------------------
 # Load the checkpoint directly as it was saved (a plain ResNet50 with custom fc head)
-state_dict = torch.load("best_stanford_cars_transfer_model.pth", map_location="cpu")
 # Create a ResNet50 and modify its fc to match the checkpoint
 from torchvision.models import resnet50
@@ -241,10 +242,61 @@ labels = [
     "smart fortwo Convertible 2012"
 ]
 # --------------------------
 # 4. Preprocessing function
 # --------------------------
 def preprocess_image(img: Image.Image):
     transform = transforms.Compose([
         transforms.Resize((224, 224)),           # match your training input size
         transforms.ToTensor(),
@@ -253,7 +305,7 @@ def preprocess_image(img: Image.Image):
             std=[0.229, 0.224, 0.225]
         )
     ])
-    x = transform(img).unsqueeze(0)  # add batch dimension
     return x
 # --------------------------

 # 2. Load model weights
 # --------------------------
 # Load the checkpoint directly as it was saved (a plain ResNet50 with custom fc head)
+# state_dict = torch.load("best_stanford_cars_transfer_model.pth", map_location="cpu")
+state_dict = torch.load("test_with_YOLO.pth", map_location="cpu")
 # Create a ResNet50 and modify its fc to match the checkpoint
 from torchvision.models import resnet50
     "smart fortwo Convertible 2012"
 ]
+from ultralytics import YOLO
+import numpy as np
+# --------------------------
+# Load YOLO model for cropping
+# --------------------------
+device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
+yolo_model = YOLO('yolov8n.pt')  # Using the small 'nano' model
+print("YOLOv8 model loaded.")
+# --------------------------
+# Define YOLO cropping function
+# --------------------------
+def detect_and_crop_pil(pil_image, model=yolo_model, device=device_str, conf_thresh=0.25, pad_ratio=0.05):
+    """
+    Run YOLO on a PIL image and return a cropped PIL image around the best car detection.
+    If no car is found, it returns the original image.
+    """
+    results = model(pil_image, imgsz=640, conf=conf_thresh, device=device, verbose=False)
+    if len(results) == 0 or results[0].boxes is None or len(results[0].boxes) == 0:
+        return pil_image
+    r = results[0]
+    boxes = r.boxes.xyxy.cpu().numpy()
+    try:
+        classes = r.boxes.cls.cpu().numpy().astype(int)
+    except Exception:
+        classes = np.zeros(len(boxes), dtype=int)
+    # Prefer COCO car class (index 2)
+    car_indices = np.where(classes == 2)[0]
+    if len(car_indices) == 0:
+        return pil_image # Return original if no car detected
+    # Choose the car detection with the largest box area
+    areas = (boxes[car_indices, 2] - boxes[car_indices, 0]) * (boxes[car_indices, 3] - boxes[car_indices, 1])
+    best_idx = car_indices[np.argmax(areas)]
+    x1, y1, x2, y2 = boxes[best_idx].astype(int)
+    # Add padding
+    w, h = x2 - x1, y2 - y1
+    pad = int(max(w, h) * pad_ratio)
+    x1, y1 = max(0, x1 - pad), max(0, y1 - pad)
+    x2, y2 = min(pil_image.width, x2 + pad), min(pil_image.height, y2 + pad)
+    return pil_image.crop((x1, y1, x2, y2))
 # --------------------------
 # 4. Preprocessing function
 # --------------------------
 def preprocess_image(img: Image.Image):
+    cropped_img = detect_and_crop_pil(img)
     transform = transforms.Compose([
         transforms.Resize((224, 224)),           # match your training input size
         transforms.ToTensor(),
             std=[0.229, 0.224, 0.225]
         )
     ])
+    x = transform(cropped_img).unsqueeze(0)  # add batch dimension
     return x
 # --------------------------