first commit

Browse files

Files changed (7) hide show

.gitattributes +3 -0
AX650/fall_ax650_npu3.axmodel +3 -0
README.md +91 -0
axmodel_infer_fall.py +378 -0
axmodel_res.jpg +3 -0
config.json +0 -0
fall4.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.axmodel filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

AX650/fall_ax650_npu3.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee38051240bd19e4a9b77030e983378ced8ac581f9f7c824349527340613eb8
+size 1129098

README.md ADDED Viewed

	@@ -0,0 +1,91 @@

+---
+license: agpl-3.0
+language:
+- en
+pipeline_tag: object-detection
+tags:
+- Axera
+- YOLOv7-pose
+- NPU
+- Object Detection
+- Keypoint Detection
+---
+# fall-axera
+This version of **fall-axera** has been converted to run on the Axera NPU using **w8a16** quantization. It is trained with modified yolov7-pose model to detect bbox and 14 keypoints of human, and to determine whether a fall behavior is likely to occur.
+## Supported Classes
+This model is trained to detect the following classes:
+1. **normal**
+2. **fall**
+## Supported keypoints
+This model is trained to detect the following 14 keypoints:
+```
+"keypoints": { 0: "right shoulder", 1: "right elbow", 2: "right wrist", 3: "left shoulder", 4: "left elbow", 5: "left wrist", 6: "right hip", 7: "right knee", 8: "right ankle", 9: "left hip", 10: "left knee", 11: "left ankle", 12: "head tops", 13: "upper neck" }"
+```
+Compatible with Pulsar2 version: 5.2.
+## Convert tools links:
+For those who are interested in model conversion, you can try to export axmodel through:
+- [The repo of AXera Platform](https://github.com/AXERA-TECH/ax-samples), where you can get the detailed guide.
+- [Pulsar2 Link, How to Convert ONNX to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/pulsar2/introduction.html)
+## Support Platform
+https://docs.m5stack.com/zh_CN/ai_hardware/AI_Pyramid-Pro
+- **AX650N/AX8850**
+  - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
+  - [AI Pyramid](https://docs.m5stack.com/zh_CN/ai_hardware/AI_Pyramid-Pro)
+  - [M.2 Accelerator card](https://docs.m5stack.com/en/ai_hardware/LLM-8850_Card)
+## How to use
+Download all files from this repository to the device.
+### python env requirement
+#### pyaxengine
+https://github.com/AXERA-TECH/pyaxengine
+```bash
+wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc2/axengine-0.1.3-py3-none-any.whl
+pip install axengine-0.1.3-py3-none-any.whl
+```
+### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro)
+Input image:
+![](fall4.png)
+run
+```
+python3 axmodel_infer_fall.py
+```
+```
+root@ax650:~/fall-axera# python3 axmodel_infer_fall.py
+[INFO] Available providers:  ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider']
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Chip type: ChipType.MC50
+[INFO] VNPU type: VNPUType.DISABLED
+[INFO] Engine version: 2.12.0s
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 5.2 eccb31f5
+class: fall left:281 top:396 right:734 bottom:629 conf: 74%
+Result saved to axmodel_res.jpg
+```
+Output image:
+![](axmodel_res.jpg)
+### Extra
+This example only shows the model's predicted bounding boxes and keypoints. You can further assist in determining human falls based on the physical information of the boxes and keypoints, or by adding tracking and action recognition models like st-gcn. From the experiments I have conducted, factors such as occlusion, direction of falling, camera angle, and even the scene (such as on the bed or on the floor) can affect the results of fall detection.

axmodel_infer_fall.py ADDED Viewed

	@@ -0,0 +1,378 @@

+import cv2
+import numpy as np
+import axengine as axe
+import matplotlib
+import argparse
+class Colors:
+    def __init__(self):
+        self.palette = [self.hex2rgb(c) for c in matplotlib.colors.TABLEAU_COLORS.values()]
+        self.n = len(self.palette)
+    def __call__(self, i, bgr=False):
+        c = self.palette[int(i) % self.n]
+        return (c[2], c[1], c[0]) if bgr else c
+    @staticmethod
+    def hex2rgb(h):
+        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
+colors = Colors()
+def plot_one_box(x, im, color=None, label=None, line_thickness=3, kpt_label=False, kpts=None, steps=2, orig_shape=None):
+    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
+    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA)
+    if label:
+        if len(label.split(' ')) > 1:
+            tf = max(tl - 1, 1)
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0]
+            c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+            cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)
+            cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA)
+    if kpt_label:
+        plot_skeleton_kpts(im, kpts, steps, orig_shape=orig_shape)
+def plot_skeleton_kpts(im, kpts, steps, orig_shape=None):
+    palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+                        [230, 230, 0], [255, 153, 255], [153, 204, 255],
+                        [255, 102, 255], [255, 51, 255], [102, 178, 255],
+                        [51, 153, 255], [255, 153, 153], [255, 102, 102],
+                        [255, 51, 51], [153, 255, 153], [102, 255, 102],
+                        [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
+                        [255, 255, 255]])
+    num_kpts = len(kpts) // steps
+    skeleton = [[1, 2], [2, 3], [14, 1], [14, 4], [4, 5], [5, 6], [13, 14], [7, 14], [10, 14], [7, 8], [8, 9],[10,11],[11, 12]]
+    pose_limb_color = palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16][:(num_kpts+2)]]
+    pose_kpt_color = palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9][:num_kpts]]
+    radius = 5
+    for kid in range(num_kpts):
+        r, g, b = pose_kpt_color[kid]
+        x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
+        if not (x_coord % 640 == 0 or y_coord % 640 == 0):
+            if steps == 3:
+                conf = kpts[steps * kid + 2]
+            cv2.circle(im, (int(x_coord), int(y_coord)), radius, (int(r), int(g), int(b)), -1)
+            cv2.putText(im,str(kid),(int(x_coord-2), int(y_coord-2)),cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255),1)
+    for sk_id, sk in enumerate(skeleton):
+        r, g, b = pose_limb_color[sk_id]
+        pos1 = (int(kpts[(sk[0]-1)*steps]), int(kpts[(sk[0]-1)*steps+1]))
+        pos2 = (int(kpts[(sk[1]-1)*steps]), int(kpts[(sk[1]-1)*steps+1]))
+        if steps == 3:
+            conf1 = kpts[(sk[0]-1)*steps+2]
+            conf2 = kpts[(sk[1]-1)*steps+2]
+        if pos1[0]%640 == 0 or pos1[1]%640==0 or pos1[0]<0 or pos1[1]<0:
+            continue
+        if pos2[0] % 640 == 0 or pos2[1] % 640 == 0 or pos2[0]<0 or pos2[1]<0:
+            continue
+        cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
+def box_iou(box1, box2, eps=1e-7):
+    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
+    inter = (np.min(a2, b2) - np.max(a1, b1)).clamp(0).prod(2)
+    return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    shape = im.shape[:2]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:
+        r = min(r, 1.0)
+    ratio = r, r
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
+    if auto:
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)
+    elif scaleFill:
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
+    dw /= 2
+    dh /= 2
+    if shape[::-1] != new_unpad:
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
+    return im, ratio, (dw, dh)
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, kpt_label=False, step=2):
+    if ratio_pad is None:
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2
+    else:
+        gain = ratio_pad[0]
+        pad = ratio_pad[1]
+    if isinstance(gain, (list, tuple)):
+        gain = gain[0]
+    if not kpt_label:
+        coords[:, [0, 2]] -= pad[0]
+        coords[:, [1, 3]] -= pad[1]
+        coords[:, [0, 2]] /= gain
+        coords[:, [1, 3]] /= gain
+    else:
+        coords[:, 0::step] -= pad[0]
+        coords[:, 1::step] -= pad[1]
+        coords[:, 0::step] /= gain
+        coords[:, 1::step] /= gain
+    return coords
+def clip_coords(boxes, img_shape, step=2):
+    boxes[:, 0::step].clamp_(0, img_shape[1])
+    boxes[:, 1::step].clamp_(0, img_shape[0])
+def model_inference(model_path=None, input=None):
+    session = axe.InferenceSession(model_path, None)
+    input_name = session.get_inputs()[0].name
+    output = session.run(None, {input_name: input})
+    return output
+def xywh2xyxy(x):
+    y = np.copy(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
+def nms_boxes(boxes, scores):
+    x = boxes[:, 0]
+    y = boxes[:, 1]
+    w = boxes[:, 2] - boxes[:, 0]
+    h = boxes[:, 3] - boxes[:, 1]
+    areas = w * h
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x[i], x[order[1:]])
+        yy1 = np.maximum(y[i], y[order[1:]])
+        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
+        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
+        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
+        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
+        inter = w1 * h1
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= 0.45)[0]
+        order = order[inds + 1]
+    keep = np.array(keep)
+    return keep
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+                        labels=(), kpt_label=False, nc=None, nkpt=14):
+    if nc is None:
+        nc = prediction.shape[2] - 5  if not kpt_label else prediction.shape[2] - (5+3*nkpt)
+    xc = prediction[..., 4] > conf_thres
+    min_wh, max_wh = 2, 4096
+    max_det = 300
+    max_nms = 30000
+    redundant = True
+    multi_label &= nc > 1
+    merge = False
+    output = [np.zeros((0,6))] * prediction.shape[0]
+    for xi, x in enumerate(prediction):
+        x = x[xc[xi]]
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = np.zeros(len(l), nc + 5)
+            v[:, :4] = l[:, 1:5]
+            v[:, 4] = 1.0
+            v[range(len(l)), l[:, 0].long() + 5] = 1.0
+            x = np.concatenate((x, v), 0)
+        if not x.shape[0]:
+            continue
+        x[:, 5:5+nc] *= x[:, 4:5]
+        box = xywh2xyxy(x[:, :4])
+        if multi_label:
+            if not kpt_label:
+                i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+                x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+            else:
+                kpts = x[:, 5+nc:]
+                i, j = (x[:, 5:5+nc] > conf_thres).nonzero(as_tuple=False).T
+                x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].float(),kpts[i]), 1)
+        else:
+            if not kpt_label:
+                conf, j = x[:, 5:].max(1, keepdim=True)
+                x = np.concatenate((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            else:
+                kpts = x[:, 5+nc:]
+                conf = np.max(x[:, 5:5+nc], 1).reshape(box.shape[:1][0], 1)
+                j = np.argmax(x[:, 5:5+nc], 1).reshape(box.shape[:1][0], 1)
+                x = np.concatenate((box, conf, j, kpts), 1)[conf.reshape(box.shape[:1][0]) > conf_thres]
+        if classes is not None:
+            x = x[(x[:, 5:6] == np.array(classes, device=x.device)).any(1)]
+        n = x.shape[0]
+        if not n:
+            continue
+        elif n > max_nms:
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]
+        c = x[:, 5:6] * (0 if agnostic else max_wh)
+        boxes, scores = x[:, :4] + c, x[:, 4]
+        i = nms_boxes(boxes, scores)
+        if i.shape[0] > max_det:
+            i = i[:max_det]
+        if merge and (1 < n < 3E3):
+            iou = box_iou(boxes[i], boxes) > iou_thres
+            weights = iou * scores[None]
+            x[i, :4] = np.multiply(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)
+            if redundant:
+                i = i[iou.sum(1) > 1]
+        output[xi] = x[i]
+    return output
+def _make_grid(nx=20, ny=20):
+    y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
+    yv, xv = np.meshgrid(y, x, indexing='ij')
+    return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2))
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def preprocess(img_path, imgsz):
+    """预处理：读取图像并进行归一化"""
+    im0 = cv2.imread(img_path)
+    img = letterbox(im0, imgsz, auto=False, stride=32)[0]
+    img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1))
+    img = np.asarray(img, dtype=np.uint8)
+    img = np.expand_dims(img, 0)
+    return img, im0
+def model_postprocess(preds, anchors, stride, names, nkpt, conf_thres, iou_thres):
+    """后处理：解码预测结果、NMS和坐标变换"""
+    na = len(anchors[0]) // 2
+    nl = len(anchors)
+    nc = len(names)
+    no = len(names) + 5 + nkpt * 3
+    z = []
+    for i, pred in enumerate(preds):
+        bs, _, ny, nx = pred.shape
+        pred = pred.reshape(bs, na, no, ny, nx).transpose(0, 1, 3, 4, 2)
+        pred_det = pred[..., :5+nc]
+        pred_kpt = pred[..., 5+nc:]
+        grid = _make_grid(nx, ny)
+        kpt_grid_x = grid[..., 0:1]
+        kpt_grid_y = grid[..., 1:2]
+        y = sigmoid(pred_det)
+        xy = (y[..., 0:2] * 2. - 0.5 + grid) * stride[i]
+        wh = (y[..., 2:4] * 2) ** 2 * np.array(anchors[i]).reshape(1, 3, 1, 1, 2)
+        pred_kpt[..., 0::3] = (pred_kpt[..., ::3] * 2. - 0.5 + np.tile(kpt_grid_x, (1,1,1,1,nkpt))) * stride[i]
+        pred_kpt[..., 1::3] = (pred_kpt[..., 1::3] * 2. - 0.5 + np.tile(kpt_grid_y, (1,1,1,1,nkpt))) * stride[i]
+        pred_kpt[..., 2::3] = sigmoid(pred_kpt[..., 2::3])
+        y = np.concatenate((xy, wh, y[..., 4:], pred_kpt), axis=-1)
+        z.append(y.reshape(bs, na * nx * ny, no))
+    preds = np.concatenate(z, 1)
+    preds = non_max_suppression(preds, conf_thres, iou_thres, nc=nc, nkpt=nkpt, kpt_label=True)
+    return preds
+def draw_predictions(preds, img, im0, names, imgsz):
+    """绘制检测结果和关键点"""
+    for i, det in enumerate(preds):
+        if len(det):
+            scale_coords(imgsz, det[:, :4], im0.shape, kpt_label=False)
+            scale_coords(imgsz, det[:, 6:], im0.shape, kpt_label=True, step=3)
+            for det_index, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
+                print("class:",names[int(cls)], "left:%.0f" % xyxy[0],"top:%.0f" % xyxy[1],"right:%.0f" % xyxy[2],"bottom:%.0f" % xyxy[3], "conf:",'{:.0f}%'.format(float(conf)*100))
+                c = int(cls)
+                label = f'{names[c]} {conf:.2f}'
+                kpts = det[det_index, 6:]
+                plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=2,
+                             kpt_label=True, kpts=kpts, steps=3, orig_shape=im0.shape[:2])
+    return im0
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='跌倒检测模型推理脚本')
+    parser.add_argument('--model', type=str, default='./fall_ax650_npu3.axmodel',
+                        help='axmodel 模型路径')
+    parser.add_argument('--img', type=str, default='./fall4.png',
+                        help='输入图像路径')
+    parser.add_argument('--output', type=str, default='axmodel_res.jpg',
+                        help='输出结果图像路径')
+    parser.add_argument('--imgsz', type=int, nargs=2, default=[320, 480],
+                        help='输入图像尺寸 (height width)')
+    parser.add_argument('--conf-thres', type=float, default=0.3,
+                        help='置信度阈值')
+    parser.add_argument('--iou-thres', type=float, default=0.45,
+                        help='IOU阈值')
+    args = parser.parse_args()
+    # model params
+    names = ['normal', 'fall']
+    anchors = [[30, 61, 55, 124, 90, 207], [149, 232, 128, 357, 221, 308]]
+    stride = [16, 32]
+    nkpt = 14
+    imgsz = tuple(args.imgsz)
+    img, im0 = preprocess(args.img, imgsz)
+    preds = model_inference(args.model, img)
+    preds = model_postprocess(preds, anchors, stride, names, nkpt, args.conf_thres, args.iou_thres)
+    im0 = draw_predictions(preds, img, im0, names, imgsz)
+    cv2.imwrite(args.output, im0)
+    print(f"Result saved to {args.output}")

axmodel_res.jpg ADDED Viewed

Git LFS Details

SHA256: ed3d9c2c79700e825fd049129b9228e9ef6f94dfefaac4a8b86f9cdb047a6f1e
Pointer size: 131 Bytes
Size of remote file: 172 kB

config.json ADDED Viewed

File without changes

fall4.png ADDED Viewed

Git LFS Details

SHA256: 5919910f56b047a3b697b95eb415e227de75738be7db92bab1d350a4d8de695b
Pointer size: 131 Bytes
Size of remote file: 668 kB