Oamitai commited on
Commit
02667ce
·
verified ·
1 Parent(s): 401eed3

Upload 10 files

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ best.pt filter=lfs diff=lfs merge=lfs -text
.gitattributes.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.pth filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.onnx filter=lfs diff=lfs merge=lfs -text
README.md.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9 Card Detector
2
+
3
+ This model is a fine-tuned version of YOLOv9c trained to detect playing cards in images. It has been trained on the Set Cards dataset from Roboflow.
4
+
5
+ ## Model Details
6
+
7
+ - **Base Model**: YOLOv9c
8
+ - **Task**: Object Detection
9
+ - **Target Class**: Cards
10
+ - **Training Dataset**: [Set Cards Dataset](https://universe.roboflow.com/tel-aviv/set_cards/dataset/1)
11
+ - **Image Size**: 512x512
12
+ - **Accuracy Metrics**: Evaluated at confidence threshold of 0.5
13
+
14
+ ## Usage
15
+
16
+ ```python
17
+ from transformers import AutoImageProcessor, AutoModelForObjectDetection
18
+ import torch
19
+ from PIL import Image
20
+ import requests
21
+
22
+ # Load model and processor
23
+ processor = AutoImageProcessor.from_pretrained("YOUR_USERNAME/yolov9-card-detector")
24
+ model = AutoModelForObjectDetection.from_pretrained("YOUR_USERNAME/yolov9-card-detector")
25
+
26
+ # Load image
27
+ image_url = "https://example.com/path/to/card_image.jpg"
28
+ image = Image.open(requests.get(image_url, stream=True).raw)
29
+
30
+ # Prepare image for the model
31
+ inputs = processor(images=image, return_tensors="pt")
32
+
33
+ # Make prediction
34
+ with torch.no_grad():
35
+ outputs = model(**inputs)
36
+
37
+ # Process results
38
+ results = processor.post_process_object_detection(
39
+ outputs,
40
+ threshold=0.5,
41
+ target_sizes=[(image.height, image.width)]
42
+ )[0]
43
+
44
+ # Display results
45
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
46
+ box = [round(i, 2) for i in box.tolist()]
47
+ print(
48
+ f"Detected {model.config.id2label[label.item()]} with confidence "
49
+ f"{round(score.item(), 3)} at location {box}"
50
+ )
51
+ ```
52
+
53
+ ## Training
54
+
55
+ This model was fine-tuned from YOLOv9c using the Ultralytics framework. It was trained for 30 epochs with an image size of 512x512.
56
+
57
+ ## License
58
+
59
+ This model is licensed under CC BY 4.0, following the dataset's licensing terms.
60
+
61
+ ## Limitations
62
+
63
+ - The model is specifically trained to detect playing cards and may not perform well on other objects
64
+ - Performance may vary based on lighting conditions, card orientation, and image quality
65
+ - Best results are achieved with images similar to those in the training dataset
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import torch
3
+ import cv2
4
+ import base64
5
+ import numpy as np
6
+ from PIL import Image
7
+ from ultralytics import YOLO
8
+ from transformers import AutoImageProcessor
9
+ from datasets import Image as HFImage
10
+
11
+ def overlay_boxes(image, boxes, scores, labels, class_names, conf_threshold=0.5):
12
+ """Draw bounding boxes on the image with labels and scores"""
13
+ img = np.array(image.copy())
14
+ colors = {0: (0, 255, 0)} # Green for card class
15
+
16
+ for box, score, label in zip(boxes, scores, labels):
17
+ if score >= conf_threshold:
18
+ x1, y1, x2, y2 = map(int, box)
19
+ label_text = f"{class_names[int(label)]}: {score:.2f}"
20
+ color = colors.get(int(label), (255, 0, 0))
21
+ cv2_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
22
+ cv2.rectangle(cv2_image, (x1, y1), (x2, y2), color, 2)
23
+ cv2.putText(cv2_image, label_text, (x1, y1 - 10),
24
+ cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
25
+ img = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
26
+
27
+ return Image.fromarray(img)
28
+
29
+ class YOLOv9CardDetector:
30
+ def __init__(self):
31
+ self.model = YOLO("best.pt")
32
+ self.config = {
33
+ "class_names": ["card"],
34
+ "conf_threshold": 0.5
35
+ }
36
+ self.image_processor = AutoImageProcessor.from_pretrained(".")
37
+
38
+ def __call__(self, inputs):
39
+ """Process input for the Hugging Face inference API"""
40
+ if isinstance(inputs, HFImage):
41
+ image = inputs.convert("RGB")
42
+ else:
43
+ if isinstance(inputs, dict) and "image" in inputs:
44
+ # Handle API input format
45
+ image = inputs["image"]
46
+ if isinstance(image, str):
47
+ image = Image.open(io.BytesIO(base64.b64decode(image)))
48
+ else:
49
+ image = inputs
50
+
51
+ # Get predictions from YOLOv9 model
52
+ with torch.no_grad():
53
+ results = self.model(image)
54
+
55
+ # Process results
56
+ result = results[0]
57
+ boxes = result.boxes.xyxy.cpu().numpy()
58
+ scores = result.boxes.conf.cpu().numpy()
59
+ labels = result.boxes.cls.cpu().numpy()
60
+
61
+ # Format the output for Hugging Face inference API
62
+ output = {
63
+ "boxes": boxes.tolist(),
64
+ "scores": scores.tolist(),
65
+ "labels": labels.tolist(),
66
+ "class_names": self.config["class_names"]
67
+ }
68
+
69
+ # If image is provided, also return an annotated image
70
+ try:
71
+ import cv2
72
+ annotated_image = overlay_boxes(
73
+ image,
74
+ boxes,
75
+ scores,
76
+ labels,
77
+ self.config["class_names"],
78
+ self.config["conf_threshold"]
79
+ )
80
+ buffered = io.BytesIO()
81
+ annotated_image.save(buffered, format="JPEG")
82
+ output["annotated_image"] = buffered.getvalue()
83
+ except ImportError:
84
+ # If cv2 is not available, skip image annotation
85
+ pass
86
+
87
+ return output
88
+
89
+ # Initialize the model at module level for faster inference
90
+ detector = YOLOv9CardDetector()
91
+
92
+ def run_inference(inputs):
93
+ """Entry point for the model"""
94
+ return detector(inputs)
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a20fbe3f58b9c85b32de062a5f26137f37c1a6fcda7be0f1dcac31dca645918
3
+ size 6251353
config.json.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["YOLOv9"],
3
+ "model_type": "yolov9",
4
+ "backbone": "yolov9c",
5
+ "num_classes": 1,
6
+ "class_names": ["card"],
7
+ "image_size": [512, 512],
8
+ "threshold": 0.5,
9
+ "version": "YOLOv9c",
10
+ "training": {
11
+ "epochs": 30,
12
+ "batch_size": 16,
13
+ "optimizer": "SGD"
14
+ },
15
+ "source": "fine-tuned from ultralytics/yolov9c",
16
+ "license": "CC BY 4.0",
17
+ "dataset": "https://universe.roboflow.com/tel-aviv/set_cards/dataset/1"
18
+ }
data.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ names:
2
+ - 'card'
3
+ nc: 1
4
+ roboflow:
5
+ license: CC BY 4.0
6
+ project: set_cards
7
+ url: https://universe.roboflow.com/tel-aviv/set_cards/dataset/1
8
+ version: 1
9
+ workspace: tel-aviv
10
+ test: ../test/images
11
+ train: /content/SETCARDSBATCH-3/train/images
12
+ val: /content/SETCARDSBATCH-3/valid/images
inference.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics import YOLO
2
+ import numpy as np
3
+ from PIL import Image
4
+ import io
5
+ import base64
6
+ import torch
7
+
8
+ def hf_predict(image_bytes):
9
+ """
10
+ Inference function for Hugging Face Inference API
11
+
12
+ Args:
13
+ image_bytes: Image file bytes from a POST request
14
+
15
+ Returns:
16
+ dict: Detection results in a format compatible with the Inference API
17
+ """
18
+ # Load image
19
+ image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
20
+
21
+ # Load model
22
+ model = YOLO('best.pt')
23
+
24
+ # Run inference
25
+ results = model(image)
26
+
27
+ # Process results
28
+ result = results[0]
29
+ boxes = result.boxes.xyxy.cpu().numpy()
30
+ scores = result.boxes.conf.cpu().numpy()
31
+ class_ids = result.boxes.cls.cpu().numpy()
32
+
33
+ # Map class IDs to names
34
+ class_names = ["card"]
35
+ labels = [class_names[int(i)] for i in class_ids]
36
+
37
+ # Format results
38
+ detections = []
39
+ for box, score, label in zip(boxes, scores, labels):
40
+ x1, y1, x2, y2 = box
41
+ detections.append({
42
+ "box": {
43
+ "xmin": int(x1),
44
+ "ymin": int(y1),
45
+ "xmax": int(x2),
46
+ "ymax": int(y2)
47
+ },
48
+ "score": float(score),
49
+ "label": label
50
+ })
51
+
52
+ return {"detections": detections}
53
+
54
+ def visualize(image_bytes, detections):
55
+ """
56
+ Visualize the detections on the image
57
+
58
+ Args:
59
+ image_bytes: Image file bytes
60
+ detections: Detection results
61
+
62
+ Returns:
63
+ str: Base64 encoded image with visualizations
64
+ """
65
+ try:
66
+ import cv2
67
+
68
+ # Load image
69
+ nparr = np.frombuffer(image_bytes, np.uint8)
70
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
71
+
72
+ # Draw boxes
73
+ for det in detections["detections"]:
74
+ box = det["box"]
75
+ score = det["score"]
76
+ label = det["label"]
77
+
78
+ x1, y1 = box["xmin"], box["ymin"]
79
+ x2, y2 = box["xmax"], box["ymax"]
80
+
81
+ cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
82
+ cv2.putText(image, f"{label}: {score:.2f}", (x1, y1 - 10),
83
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
84
+
85
+ # Convert to base64
86
+ _, buffer = cv2.imencode('.jpg', image)
87
+ return base64.b64encode(buffer).decode('utf-8')
88
+ except:
89
+ return None
model-card.yaml.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-4.0
3
+ base_model: ultralytics/yolov9c
4
+ tags:
5
+ - object-detection
6
+ - yolo
7
+ - yolov9
8
+ - computer-vision
9
+ - cards
10
+ - image-classification
11
+ - pytorch
12
+ - ultralytics
13
+ datasets:
14
+ - tel-aviv/set_cards
15
+ widgets:
16
+ - task: object-detection
17
+ inputs:
18
+ - type: image
19
+ name: image
20
+ placeholder: Upload an image of playing cards
21
+ required: true
22
+ ---
preprocessor_config.json.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor_type": "YoloImageProcessor",
3
+ "size": {
4
+ "height": 512,
5
+ "width": 512
6
+ },
7
+ "do_normalize": true,
8
+ "do_resize": true,
9
+ "do_rescale": true,
10
+ "rescale_factor": 0.00392156862745098,
11
+ "do_pad": true,
12
+ "padding_value": 114,
13
+ "format": "pytorch",
14
+ "do_convert_rgb": true
15
+ }
requirements.txt.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ torchvision>=0.15.0
3
+ numpy>=1.24.0
4
+ opencv-python>=4.7.0
5
+ ultralytics>=8.1.0
6
+ transformers>=4.35.0
7
+ pillow>=9.5.0