Upload 10 files
Browse files- .gitattributes +1 -0
- .gitattributes.txt +4 -0
- README.md.md +65 -0
- app.py +94 -0
- best.pt +3 -0
- config.json.json +18 -0
- data.yaml +12 -0
- inference.py +89 -0
- model-card.yaml.txt +22 -0
- preprocessor_config.json.json +15 -0
- requirements.txt.txt +7 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
best.pt filter=lfs diff=lfs merge=lfs -text
|
.gitattributes.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
README.md.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# YOLOv9 Card Detector
|
| 2 |
+
|
| 3 |
+
This model is a fine-tuned version of YOLOv9c trained to detect playing cards in images. It has been trained on the Set Cards dataset from Roboflow.
|
| 4 |
+
|
| 5 |
+
## Model Details
|
| 6 |
+
|
| 7 |
+
- **Base Model**: YOLOv9c
|
| 8 |
+
- **Task**: Object Detection
|
| 9 |
+
- **Target Class**: Cards
|
| 10 |
+
- **Training Dataset**: [Set Cards Dataset](https://universe.roboflow.com/tel-aviv/set_cards/dataset/1)
|
| 11 |
+
- **Image Size**: 512x512
|
| 12 |
+
- **Accuracy Metrics**: Evaluated at confidence threshold of 0.5
|
| 13 |
+
|
| 14 |
+
## Usage
|
| 15 |
+
|
| 16 |
+
```python
|
| 17 |
+
from transformers import AutoImageProcessor, AutoModelForObjectDetection
|
| 18 |
+
import torch
|
| 19 |
+
from PIL import Image
|
| 20 |
+
import requests
|
| 21 |
+
|
| 22 |
+
# Load model and processor
|
| 23 |
+
processor = AutoImageProcessor.from_pretrained("YOUR_USERNAME/yolov9-card-detector")
|
| 24 |
+
model = AutoModelForObjectDetection.from_pretrained("YOUR_USERNAME/yolov9-card-detector")
|
| 25 |
+
|
| 26 |
+
# Load image
|
| 27 |
+
image_url = "https://example.com/path/to/card_image.jpg"
|
| 28 |
+
image = Image.open(requests.get(image_url, stream=True).raw)
|
| 29 |
+
|
| 30 |
+
# Prepare image for the model
|
| 31 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 32 |
+
|
| 33 |
+
# Make prediction
|
| 34 |
+
with torch.no_grad():
|
| 35 |
+
outputs = model(**inputs)
|
| 36 |
+
|
| 37 |
+
# Process results
|
| 38 |
+
results = processor.post_process_object_detection(
|
| 39 |
+
outputs,
|
| 40 |
+
threshold=0.5,
|
| 41 |
+
target_sizes=[(image.height, image.width)]
|
| 42 |
+
)[0]
|
| 43 |
+
|
| 44 |
+
# Display results
|
| 45 |
+
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 46 |
+
box = [round(i, 2) for i in box.tolist()]
|
| 47 |
+
print(
|
| 48 |
+
f"Detected {model.config.id2label[label.item()]} with confidence "
|
| 49 |
+
f"{round(score.item(), 3)} at location {box}"
|
| 50 |
+
)
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
## Training
|
| 54 |
+
|
| 55 |
+
This model was fine-tuned from YOLOv9c using the Ultralytics framework. It was trained for 30 epochs with an image size of 512x512.
|
| 56 |
+
|
| 57 |
+
## License
|
| 58 |
+
|
| 59 |
+
This model is licensed under CC BY 4.0, following the dataset's licensing terms.
|
| 60 |
+
|
| 61 |
+
## Limitations
|
| 62 |
+
|
| 63 |
+
- The model is specifically trained to detect playing cards and may not perform well on other objects
|
| 64 |
+
- Performance may vary based on lighting conditions, card orientation, and image quality
|
| 65 |
+
- Best results are achieved with images similar to those in the training dataset
|
app.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import torch
|
| 3 |
+
import cv2
|
| 4 |
+
import base64
|
| 5 |
+
import numpy as np
|
| 6 |
+
from PIL import Image
|
| 7 |
+
from ultralytics import YOLO
|
| 8 |
+
from transformers import AutoImageProcessor
|
| 9 |
+
from datasets import Image as HFImage
|
| 10 |
+
|
| 11 |
+
def overlay_boxes(image, boxes, scores, labels, class_names, conf_threshold=0.5):
|
| 12 |
+
"""Draw bounding boxes on the image with labels and scores"""
|
| 13 |
+
img = np.array(image.copy())
|
| 14 |
+
colors = {0: (0, 255, 0)} # Green for card class
|
| 15 |
+
|
| 16 |
+
for box, score, label in zip(boxes, scores, labels):
|
| 17 |
+
if score >= conf_threshold:
|
| 18 |
+
x1, y1, x2, y2 = map(int, box)
|
| 19 |
+
label_text = f"{class_names[int(label)]}: {score:.2f}"
|
| 20 |
+
color = colors.get(int(label), (255, 0, 0))
|
| 21 |
+
cv2_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 22 |
+
cv2.rectangle(cv2_image, (x1, y1), (x2, y2), color, 2)
|
| 23 |
+
cv2.putText(cv2_image, label_text, (x1, y1 - 10),
|
| 24 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
|
| 25 |
+
img = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
|
| 26 |
+
|
| 27 |
+
return Image.fromarray(img)
|
| 28 |
+
|
| 29 |
+
class YOLOv9CardDetector:
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.model = YOLO("best.pt")
|
| 32 |
+
self.config = {
|
| 33 |
+
"class_names": ["card"],
|
| 34 |
+
"conf_threshold": 0.5
|
| 35 |
+
}
|
| 36 |
+
self.image_processor = AutoImageProcessor.from_pretrained(".")
|
| 37 |
+
|
| 38 |
+
def __call__(self, inputs):
|
| 39 |
+
"""Process input for the Hugging Face inference API"""
|
| 40 |
+
if isinstance(inputs, HFImage):
|
| 41 |
+
image = inputs.convert("RGB")
|
| 42 |
+
else:
|
| 43 |
+
if isinstance(inputs, dict) and "image" in inputs:
|
| 44 |
+
# Handle API input format
|
| 45 |
+
image = inputs["image"]
|
| 46 |
+
if isinstance(image, str):
|
| 47 |
+
image = Image.open(io.BytesIO(base64.b64decode(image)))
|
| 48 |
+
else:
|
| 49 |
+
image = inputs
|
| 50 |
+
|
| 51 |
+
# Get predictions from YOLOv9 model
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
results = self.model(image)
|
| 54 |
+
|
| 55 |
+
# Process results
|
| 56 |
+
result = results[0]
|
| 57 |
+
boxes = result.boxes.xyxy.cpu().numpy()
|
| 58 |
+
scores = result.boxes.conf.cpu().numpy()
|
| 59 |
+
labels = result.boxes.cls.cpu().numpy()
|
| 60 |
+
|
| 61 |
+
# Format the output for Hugging Face inference API
|
| 62 |
+
output = {
|
| 63 |
+
"boxes": boxes.tolist(),
|
| 64 |
+
"scores": scores.tolist(),
|
| 65 |
+
"labels": labels.tolist(),
|
| 66 |
+
"class_names": self.config["class_names"]
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
# If image is provided, also return an annotated image
|
| 70 |
+
try:
|
| 71 |
+
import cv2
|
| 72 |
+
annotated_image = overlay_boxes(
|
| 73 |
+
image,
|
| 74 |
+
boxes,
|
| 75 |
+
scores,
|
| 76 |
+
labels,
|
| 77 |
+
self.config["class_names"],
|
| 78 |
+
self.config["conf_threshold"]
|
| 79 |
+
)
|
| 80 |
+
buffered = io.BytesIO()
|
| 81 |
+
annotated_image.save(buffered, format="JPEG")
|
| 82 |
+
output["annotated_image"] = buffered.getvalue()
|
| 83 |
+
except ImportError:
|
| 84 |
+
# If cv2 is not available, skip image annotation
|
| 85 |
+
pass
|
| 86 |
+
|
| 87 |
+
return output
|
| 88 |
+
|
| 89 |
+
# Initialize the model at module level for faster inference
|
| 90 |
+
detector = YOLOv9CardDetector()
|
| 91 |
+
|
| 92 |
+
def run_inference(inputs):
|
| 93 |
+
"""Entry point for the model"""
|
| 94 |
+
return detector(inputs)
|
best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a20fbe3f58b9c85b32de062a5f26137f37c1a6fcda7be0f1dcac31dca645918
|
| 3 |
+
size 6251353
|
config.json.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": ["YOLOv9"],
|
| 3 |
+
"model_type": "yolov9",
|
| 4 |
+
"backbone": "yolov9c",
|
| 5 |
+
"num_classes": 1,
|
| 6 |
+
"class_names": ["card"],
|
| 7 |
+
"image_size": [512, 512],
|
| 8 |
+
"threshold": 0.5,
|
| 9 |
+
"version": "YOLOv9c",
|
| 10 |
+
"training": {
|
| 11 |
+
"epochs": 30,
|
| 12 |
+
"batch_size": 16,
|
| 13 |
+
"optimizer": "SGD"
|
| 14 |
+
},
|
| 15 |
+
"source": "fine-tuned from ultralytics/yolov9c",
|
| 16 |
+
"license": "CC BY 4.0",
|
| 17 |
+
"dataset": "https://universe.roboflow.com/tel-aviv/set_cards/dataset/1"
|
| 18 |
+
}
|
data.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
names:
|
| 2 |
+
- 'card'
|
| 3 |
+
nc: 1
|
| 4 |
+
roboflow:
|
| 5 |
+
license: CC BY 4.0
|
| 6 |
+
project: set_cards
|
| 7 |
+
url: https://universe.roboflow.com/tel-aviv/set_cards/dataset/1
|
| 8 |
+
version: 1
|
| 9 |
+
workspace: tel-aviv
|
| 10 |
+
test: ../test/images
|
| 11 |
+
train: /content/SETCARDSBATCH-3/train/images
|
| 12 |
+
val: /content/SETCARDSBATCH-3/valid/images
|
inference.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from ultralytics import YOLO
|
| 2 |
+
import numpy as np
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import io
|
| 5 |
+
import base64
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
def hf_predict(image_bytes):
|
| 9 |
+
"""
|
| 10 |
+
Inference function for Hugging Face Inference API
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
image_bytes: Image file bytes from a POST request
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
dict: Detection results in a format compatible with the Inference API
|
| 17 |
+
"""
|
| 18 |
+
# Load image
|
| 19 |
+
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
|
| 20 |
+
|
| 21 |
+
# Load model
|
| 22 |
+
model = YOLO('best.pt')
|
| 23 |
+
|
| 24 |
+
# Run inference
|
| 25 |
+
results = model(image)
|
| 26 |
+
|
| 27 |
+
# Process results
|
| 28 |
+
result = results[0]
|
| 29 |
+
boxes = result.boxes.xyxy.cpu().numpy()
|
| 30 |
+
scores = result.boxes.conf.cpu().numpy()
|
| 31 |
+
class_ids = result.boxes.cls.cpu().numpy()
|
| 32 |
+
|
| 33 |
+
# Map class IDs to names
|
| 34 |
+
class_names = ["card"]
|
| 35 |
+
labels = [class_names[int(i)] for i in class_ids]
|
| 36 |
+
|
| 37 |
+
# Format results
|
| 38 |
+
detections = []
|
| 39 |
+
for box, score, label in zip(boxes, scores, labels):
|
| 40 |
+
x1, y1, x2, y2 = box
|
| 41 |
+
detections.append({
|
| 42 |
+
"box": {
|
| 43 |
+
"xmin": int(x1),
|
| 44 |
+
"ymin": int(y1),
|
| 45 |
+
"xmax": int(x2),
|
| 46 |
+
"ymax": int(y2)
|
| 47 |
+
},
|
| 48 |
+
"score": float(score),
|
| 49 |
+
"label": label
|
| 50 |
+
})
|
| 51 |
+
|
| 52 |
+
return {"detections": detections}
|
| 53 |
+
|
| 54 |
+
def visualize(image_bytes, detections):
|
| 55 |
+
"""
|
| 56 |
+
Visualize the detections on the image
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
image_bytes: Image file bytes
|
| 60 |
+
detections: Detection results
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
str: Base64 encoded image with visualizations
|
| 64 |
+
"""
|
| 65 |
+
try:
|
| 66 |
+
import cv2
|
| 67 |
+
|
| 68 |
+
# Load image
|
| 69 |
+
nparr = np.frombuffer(image_bytes, np.uint8)
|
| 70 |
+
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 71 |
+
|
| 72 |
+
# Draw boxes
|
| 73 |
+
for det in detections["detections"]:
|
| 74 |
+
box = det["box"]
|
| 75 |
+
score = det["score"]
|
| 76 |
+
label = det["label"]
|
| 77 |
+
|
| 78 |
+
x1, y1 = box["xmin"], box["ymin"]
|
| 79 |
+
x2, y2 = box["xmax"], box["ymax"]
|
| 80 |
+
|
| 81 |
+
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
| 82 |
+
cv2.putText(image, f"{label}: {score:.2f}", (x1, y1 - 10),
|
| 83 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
| 84 |
+
|
| 85 |
+
# Convert to base64
|
| 86 |
+
_, buffer = cv2.imencode('.jpg', image)
|
| 87 |
+
return base64.b64encode(buffer).decode('utf-8')
|
| 88 |
+
except:
|
| 89 |
+
return None
|
model-card.yaml.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-4.0
|
| 3 |
+
base_model: ultralytics/yolov9c
|
| 4 |
+
tags:
|
| 5 |
+
- object-detection
|
| 6 |
+
- yolo
|
| 7 |
+
- yolov9
|
| 8 |
+
- computer-vision
|
| 9 |
+
- cards
|
| 10 |
+
- image-classification
|
| 11 |
+
- pytorch
|
| 12 |
+
- ultralytics
|
| 13 |
+
datasets:
|
| 14 |
+
- tel-aviv/set_cards
|
| 15 |
+
widgets:
|
| 16 |
+
- task: object-detection
|
| 17 |
+
inputs:
|
| 18 |
+
- type: image
|
| 19 |
+
name: image
|
| 20 |
+
placeholder: Upload an image of playing cards
|
| 21 |
+
required: true
|
| 22 |
+
---
|
preprocessor_config.json.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor_type": "YoloImageProcessor",
|
| 3 |
+
"size": {
|
| 4 |
+
"height": 512,
|
| 5 |
+
"width": 512
|
| 6 |
+
},
|
| 7 |
+
"do_normalize": true,
|
| 8 |
+
"do_resize": true,
|
| 9 |
+
"do_rescale": true,
|
| 10 |
+
"rescale_factor": 0.00392156862745098,
|
| 11 |
+
"do_pad": true,
|
| 12 |
+
"padding_value": 114,
|
| 13 |
+
"format": "pytorch",
|
| 14 |
+
"do_convert_rgb": true
|
| 15 |
+
}
|
requirements.txt.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.0.0
|
| 2 |
+
torchvision>=0.15.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
opencv-python>=4.7.0
|
| 5 |
+
ultralytics>=8.1.0
|
| 6 |
+
transformers>=4.35.0
|
| 7 |
+
pillow>=9.5.0
|