#!/usr/bin/env python3 """ Compute baselines for action prediction and recognition tasks: 1. Majority class baseline 2. Transition matrix baseline (for prediction: P(next|prev), for recognition: P(current|prev)) 3. Class frequency baseline (weighted random) """ import os import sys import json import pickle import re import numpy as np from collections import Counter, defaultdict from sklearn.metrics import accuracy_score, f1_score, classification_report sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS ANNOTATION_DIR = "${PULSE_ROOT}" # Copy verb taxonomy from train_pred_cls.py VERB_MAP_RULES = [ ('抓取', '抓取'), ('拿起', '抓取'), ('拿出', '抓取'), ('从.*取出', '抓取'), ('从.*抓取', '抓取'), ('从.*提取', '抓取'), ('从.*取下', '抓取'), ('从.*抽出', '抓取'), ('从.*拔出', '抓取'), ('双手抓', '抓取'), ('双手协.*抓', '抓取'), ('分别抓', '抓取'), ('伸手', '抓取'), ('放置', '放置'), ('放回', '放置'), ('放入', '放置'), ('丢弃', '放置'), ('归还', '放置'), ('移动', '移动'), ('搬运', '移动'), ('移开', '移动'), ('递给', '移动'), ('拉', '移动'), ('推', '移动'), ('端', '移动'), ('挪', '移动'), ('传', '移动'), ('调整', '调整'), ('调节', '调整'), ('对齐', '调整'), ('理顺', '调整'), ('整平', '调整'), ('擦拭', '擦拭'), ('清洁', '擦拭'), ('清除', '擦拭'), ('清理', '擦拭'), ('擦干', '擦拭'), ('折叠', '折叠'), ('对折', '折叠'), ('弯折', '折叠'), ('卷', '折叠'), ('卷起', '折叠'), ('旋转', '旋转'), ('拧', '旋转'), ('转动', '旋转'), ('扭', '旋转'), ('翻转', '旋转'), ('翻开', '旋转'), ('掀', '旋转'), ('操作', '操作'), ('使用', '操作'), ('打开', '操作'), ('关闭', '操作'), ('开启', '操作'), ('启动', '操作'), ('切割', '操作'), ('切', '操作'), ('剪', '操作'), ('按', '操作'), ('点', '操作'), ('敲', '操作'), ('盖', '盖合'), ('盖上', '盖合'), ('合上', '盖合'), ('扣上', '盖合'), ('密封', '盖合'), ('整理', '整理'), ('收纳', '整理'), ('归类', '整理'), ('排列', '整理'), ('堆叠', '整理'), ('叠放', '整理'), ('展开', '展开'), ('铺', '展开'), ('摊', '展开'), ('撑开', '展开'), ('打开.*展', '展开'), ('倾倒', '倾倒'), ('倒', '倾倒'), ('注入', '倾倒'), ('浇', '倾倒'), ('淋', '倾倒'), ('检查', '检查'), ('查看', '检查'), ('观察', '检查'), ('确认', '检查'), ('审视', '检查'), ('提起', '提起'), ('举起', '提起'), ('抬起', '提起'), ('提', '提起'), ('举', '提起'), ('释放', '释放'), ('松开', '释放'), ('松手', '释放'), ('放开', '释放'), ('脱手', '释放'), ('粘贴', '粘贴'), ('贴', '粘贴'), ('粘', '粘贴'), ('缠', '粘贴'), ('绑', '粘贴'), ('系', '粘贴'), ('连接', '粘贴'), ('固定', '粘贴'), ('分离', '分离'), ('拆', '分离'), ('撕', '分离'), ('剥', '分离'), ('解开', '分离'), ('拔', '分离'), ('按压', '按压'), ('压', '按压'), ('挤', '按压'), ('捏', '按压'), ] ACTION_CLASSES = [ '抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '操作', '盖合', '整理', '展开', '倾倒', '检查', '提起', '释放', '粘贴', '分离', '按压', '翻转', '其他' ] COARSE_MAP = { '抓取': '抓取', '放置': '放置', '移动': '移动', '调整': '调整', '擦拭': '擦拭', '折叠': '折叠', '旋转': '旋转', '操作': '其他', '盖合': '其他', '整理': '其他', '展开': '其他', '倾倒': '其他', '检查': '其他', '提起': '其他', '释放': '其他', '粘贴': '其他', '分离': '其他', '按压': '其他', '翻转': '其他', '其他': '其他', } COARSE_CLASSES = ['抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '其他'] def classify_verb(text): for pattern, verb in VERB_MAP_RULES: if re.search(pattern, text): return verb return '其他' def load_annotations(vols, coarse=True): """Load all annotation segments with verb labels.""" segments = [] # list of (vol, scene, label_idx, prev_label_idx) classes = COARSE_CLASSES if coarse else ACTION_CLASSES class2idx = {c: i for i, c in enumerate(classes)} for vol in vols: ann_dir = os.path.join(ANNOTATION_DIR, vol) if not os.path.isdir(ann_dir): continue for fn in sorted(os.listdir(ann_dir)): if not fn.endswith('.json'): continue with open(os.path.join(ann_dir, fn)) as f: data = json.load(f) anns = data.get('segments', data.get('annotations', [])) scene_segs = [] for ann in anns: text = ann.get('task', ann.get('description', '')) verb = classify_verb(text) if coarse: verb = COARSE_MAP.get(verb, '其他') if verb in class2idx: scene_segs.append(class2idx[verb]) # For prediction: pairs of (prev, next) # For recognition: pairs of (prev, current) — same thing for i in range(len(scene_segs)): prev = scene_segs[i - 1] if i > 0 else scene_segs[i] current = scene_segs[i] segments.append((prev, current)) return segments, classes def compute_transition_matrix(segments, num_classes): """Compute P(next|prev) from training segments.""" counts = np.zeros((num_classes, num_classes)) for prev, current in segments: counts[prev, current] += 1 # Normalize rows row_sums = counts.sum(axis=1, keepdims=True) row_sums[row_sums == 0] = 1 trans_matrix = counts / row_sums return trans_matrix def main(): for coarse in [True, False]: tag = "8 coarse" if coarse else "20 fine" print(f"\n{'='*60}") print(f"Baselines — {tag} classes") print(f"{'='*60}") train_segs, classes = load_annotations(TRAIN_VOLS, coarse=coarse) test_segs, _ = load_annotations(TEST_VOLS, coarse=coarse) num_classes = len(classes) # Extract test labels test_prev = [s[0] for s in test_segs] test_true = [s[1] for s in test_segs] train_labels = [s[1] for s in train_segs] print(f"Train segments: {len(train_segs)}") print(f"Test segments: {len(test_segs)}") # 1. Majority class baseline label_counts = Counter(train_labels) majority_class = label_counts.most_common(1)[0][0] majority_preds = [majority_class] * len(test_true) maj_acc = accuracy_score(test_true, majority_preds) maj_f1w = f1_score(test_true, majority_preds, average='weighted', zero_division=0) maj_f1m = f1_score(test_true, majority_preds, average='macro', zero_division=0) print(f"\n1. Majority class baseline (always predict '{classes[majority_class]}'):") print(f" acc={maj_acc:.3f} f1w={maj_f1w:.3f} f1m={maj_f1m:.3f}") # 2. Class frequency baseline (predict based on train distribution) freq = np.zeros(num_classes) for l in train_labels: freq[l] += 1 freq = freq / freq.sum() np.random.seed(42) freq_preds = np.random.choice(num_classes, size=len(test_true), p=freq) freq_acc = accuracy_score(test_true, freq_preds) freq_f1w = f1_score(test_true, freq_preds, average='weighted', zero_division=0) freq_f1m = f1_score(test_true, freq_preds, average='macro', zero_division=0) print(f"\n2. Random (train distribution) baseline:") print(f" acc={freq_acc:.3f} f1w={freq_f1w:.3f} f1m={freq_f1m:.3f}") # 3. Transition matrix baseline trans_matrix = compute_transition_matrix(train_segs, num_classes) trans_preds = [] for prev in test_prev: # Predict most likely next given prev trans_preds.append(np.argmax(trans_matrix[prev])) trans_acc = accuracy_score(test_true, trans_preds) trans_f1w = f1_score(test_true, trans_preds, average='weighted', zero_division=0) trans_f1m = f1_score(test_true, trans_preds, average='macro', zero_division=0) print(f"\n3. Transition matrix baseline (argmax P(next|prev)):") print(f" acc={trans_acc:.3f} f1w={trans_f1w:.3f} f1m={trans_f1m:.3f}") # Print transition matrix print(f"\n Transition matrix (rows=prev, cols=next):") header = " " + "".join(f"{c[:2]:>6}" for c in classes) print(header) for i, row in enumerate(trans_matrix): vals = "".join(f"{v:6.2f}" for v in row) print(f" {classes[i][:2]}{vals}") # 4. Transition + sampling (sample from P(next|prev) instead of argmax) np.random.seed(42) trans_sample_preds = [] for prev in test_prev: p = trans_matrix[prev] if p.sum() == 0: trans_sample_preds.append(majority_class) else: trans_sample_preds.append(np.random.choice(num_classes, p=p)) ts_acc = accuracy_score(test_true, trans_sample_preds) ts_f1w = f1_score(test_true, trans_sample_preds, average='weighted', zero_division=0) ts_f1m = f1_score(test_true, trans_sample_preds, average='macro', zero_division=0) print(f"\n4. Transition matrix + sampling baseline:") print(f" acc={ts_acc:.3f} f1w={ts_f1w:.3f} f1m={ts_f1m:.3f}") # Per-class report for transition argmax print(f"\n Per-class report (transition argmax):") report = classification_report(test_true, trans_preds, target_names=classes, zero_division=0) print(report) if __name__ == '__main__': main()