File size: 10,106 Bytes

b4b2877

#!/usr/bin/env python3
"""
Compute baselines for action prediction and recognition tasks:
1. Majority class baseline
2. Transition matrix baseline (for prediction: P(next|prev), for recognition: P(current|prev))
3. Class frequency baseline (weighted random)
"""

import os
import sys
import json
import pickle
import re
import numpy as np
from collections import Counter, defaultdict
from sklearn.metrics import accuracy_score, f1_score, classification_report

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.dataset import DATASET_DIR, TRAIN_VOLS, VAL_VOLS, TEST_VOLS

ANNOTATION_DIR = "${PULSE_ROOT}"

# Copy verb taxonomy from train_pred_cls.py
VERB_MAP_RULES = [
    ('抓取', '抓取'), ('拿起', '抓取'), ('拿出', '抓取'),
    ('从.*取出', '抓取'), ('从.*抓取', '抓取'), ('从.*提取', '抓取'),
    ('从.*取下', '抓取'), ('从.*抽出', '抓取'), ('从.*拔出', '抓取'),
    ('双手抓', '抓取'), ('双手协.*抓', '抓取'), ('分别抓', '抓取'),
    ('伸手', '抓取'),
    ('放置', '放置'), ('放回', '放置'), ('放入', '放置'),
    ('丢弃', '放置'), ('归还', '放置'),
    ('移动', '移动'), ('搬运', '移动'), ('移开', '移动'),
    ('递给', '移动'), ('拉', '移动'), ('推', '移动'),
    ('端', '移动'), ('挪', '移动'), ('传', '移动'),
    ('调整', '调整'), ('调节', '调整'), ('对齐', '调整'),
    ('理顺', '调整'), ('整平', '调整'),
    ('擦拭', '擦拭'), ('清洁', '擦拭'), ('清除', '擦拭'),
    ('清理', '擦拭'), ('擦干', '擦拭'),
    ('折叠', '折叠'), ('对折', '折叠'), ('弯折', '折叠'),
    ('卷', '折叠'), ('卷起', '折叠'),
    ('旋转', '旋转'), ('拧', '旋转'), ('转动', '旋转'),
    ('扭', '旋转'), ('翻转', '旋转'), ('翻开', '旋转'),
    ('掀', '旋转'),
    ('操作', '操作'), ('使用', '操作'), ('打开', '操作'),
    ('关闭', '操作'), ('开启', '操作'), ('启动', '操作'),
    ('切割', '操作'), ('切', '操作'), ('剪', '操作'),
    ('按', '操作'), ('点', '操作'), ('敲', '操作'),
    ('盖', '盖合'), ('盖上', '盖合'), ('合上', '盖合'),
    ('扣上', '盖合'), ('密封', '盖合'),
    ('整理', '整理'), ('收纳', '整理'), ('归类', '整理'),
    ('排列', '整理'), ('堆叠', '整理'), ('叠放', '整理'),
    ('展开', '展开'), ('铺', '展开'), ('摊', '展开'),
    ('撑开', '展开'), ('打开.*展', '展开'),
    ('倾倒', '倾倒'), ('倒', '倾倒'), ('注入', '倾倒'),
    ('浇', '倾倒'), ('淋', '倾倒'),
    ('检查', '检查'), ('查看', '检查'), ('观察', '检查'),
    ('确认', '检查'), ('审视', '检查'),
    ('提起', '提起'), ('举起', '提起'), ('抬起', '提起'),
    ('提', '提起'), ('举', '提起'),
    ('释放', '释放'), ('松开', '释放'), ('松手', '释放'),
    ('放开', '释放'), ('脱手', '释放'),
    ('粘贴', '粘贴'), ('贴', '粘贴'), ('粘', '粘贴'),
    ('缠', '粘贴'), ('绑', '粘贴'), ('系', '粘贴'),
    ('连接', '粘贴'), ('固定', '粘贴'),
    ('分离', '分离'), ('拆', '分离'), ('撕', '分离'),
    ('剥', '分离'), ('解开', '分离'), ('拔', '分离'),
    ('按压', '按压'), ('压', '按压'), ('挤', '按压'),
    ('捏', '按压'),
]

ACTION_CLASSES = [
    '抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转',
    '操作', '盖合', '整理', '展开', '倾倒', '检查', '提起',
    '释放', '粘贴', '分离', '按压', '翻转', '其他'
]

COARSE_MAP = {
    '抓取': '抓取', '放置': '放置', '移动': '移动', '调整': '调整',
    '擦拭': '擦拭', '折叠': '折叠', '旋转': '旋转',
    '操作': '其他', '盖合': '其他', '整理': '其他', '展开': '其他',
    '倾倒': '其他', '检查': '其他', '提起': '其他', '释放': '其他',
    '粘贴': '其他', '分离': '其他', '按压': '其他', '翻转': '其他',
    '其他': '其他',
}

COARSE_CLASSES = ['抓取', '放置', '移动', '调整', '擦拭', '折叠', '旋转', '其他']


def classify_verb(text):
    for pattern, verb in VERB_MAP_RULES:
        if re.search(pattern, text):
            return verb
    return '其他'


def load_annotations(vols, coarse=True):
    """Load all annotation segments with verb labels."""
    segments = []  # list of (vol, scene, label_idx, prev_label_idx)

    classes = COARSE_CLASSES if coarse else ACTION_CLASSES
    class2idx = {c: i for i, c in enumerate(classes)}

    for vol in vols:
        ann_dir = os.path.join(ANNOTATION_DIR, vol)
        if not os.path.isdir(ann_dir):
            continue
        for fn in sorted(os.listdir(ann_dir)):
            if not fn.endswith('.json'):
                continue
            with open(os.path.join(ann_dir, fn)) as f:
                data = json.load(f)

            anns = data.get('segments', data.get('annotations', []))
            scene_segs = []
            for ann in anns:
                text = ann.get('task', ann.get('description', ''))
                verb = classify_verb(text)
                if coarse:
                    verb = COARSE_MAP.get(verb, '其他')
                if verb in class2idx:
                    scene_segs.append(class2idx[verb])

            # For prediction: pairs of (prev, next)
            # For recognition: pairs of (prev, current) — same thing
            for i in range(len(scene_segs)):
                prev = scene_segs[i - 1] if i > 0 else scene_segs[i]
                current = scene_segs[i]
                segments.append((prev, current))

    return segments, classes


def compute_transition_matrix(segments, num_classes):
    """Compute P(next|prev) from training segments."""
    counts = np.zeros((num_classes, num_classes))
    for prev, current in segments:
        counts[prev, current] += 1
    # Normalize rows
    row_sums = counts.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1
    trans_matrix = counts / row_sums
    return trans_matrix


def main():
    for coarse in [True, False]:
        tag = "8 coarse" if coarse else "20 fine"
        print(f"\n{'='*60}")
        print(f"Baselines — {tag} classes")
        print(f"{'='*60}")

        train_segs, classes = load_annotations(TRAIN_VOLS, coarse=coarse)
        test_segs, _ = load_annotations(TEST_VOLS, coarse=coarse)

        num_classes = len(classes)

        # Extract test labels
        test_prev = [s[0] for s in test_segs]
        test_true = [s[1] for s in test_segs]
        train_labels = [s[1] for s in train_segs]

        print(f"Train segments: {len(train_segs)}")
        print(f"Test segments: {len(test_segs)}")

        # 1. Majority class baseline
        label_counts = Counter(train_labels)
        majority_class = label_counts.most_common(1)[0][0]
        majority_preds = [majority_class] * len(test_true)
        maj_acc = accuracy_score(test_true, majority_preds)
        maj_f1w = f1_score(test_true, majority_preds, average='weighted', zero_division=0)
        maj_f1m = f1_score(test_true, majority_preds, average='macro', zero_division=0)
        print(f"\n1. Majority class baseline (always predict '{classes[majority_class]}'):")
        print(f"   acc={maj_acc:.3f} f1w={maj_f1w:.3f} f1m={maj_f1m:.3f}")

        # 2. Class frequency baseline (predict based on train distribution)
        freq = np.zeros(num_classes)
        for l in train_labels:
            freq[l] += 1
        freq = freq / freq.sum()
        np.random.seed(42)
        freq_preds = np.random.choice(num_classes, size=len(test_true), p=freq)
        freq_acc = accuracy_score(test_true, freq_preds)
        freq_f1w = f1_score(test_true, freq_preds, average='weighted', zero_division=0)
        freq_f1m = f1_score(test_true, freq_preds, average='macro', zero_division=0)
        print(f"\n2. Random (train distribution) baseline:")
        print(f"   acc={freq_acc:.3f} f1w={freq_f1w:.3f} f1m={freq_f1m:.3f}")

        # 3. Transition matrix baseline
        trans_matrix = compute_transition_matrix(train_segs, num_classes)
        trans_preds = []
        for prev in test_prev:
            # Predict most likely next given prev
            trans_preds.append(np.argmax(trans_matrix[prev]))
        trans_acc = accuracy_score(test_true, trans_preds)
        trans_f1w = f1_score(test_true, trans_preds, average='weighted', zero_division=0)
        trans_f1m = f1_score(test_true, trans_preds, average='macro', zero_division=0)
        print(f"\n3. Transition matrix baseline (argmax P(next|prev)):")
        print(f"   acc={trans_acc:.3f} f1w={trans_f1w:.3f} f1m={trans_f1m:.3f}")

        # Print transition matrix
        print(f"\n   Transition matrix (rows=prev, cols=next):")
        header = "   " + "".join(f"{c[:2]:>6}" for c in classes)
        print(header)
        for i, row in enumerate(trans_matrix):
            vals = "".join(f"{v:6.2f}" for v in row)
            print(f"   {classes[i][:2]}{vals}")

        # 4. Transition + sampling (sample from P(next|prev) instead of argmax)
        np.random.seed(42)
        trans_sample_preds = []
        for prev in test_prev:
            p = trans_matrix[prev]
            if p.sum() == 0:
                trans_sample_preds.append(majority_class)
            else:
                trans_sample_preds.append(np.random.choice(num_classes, p=p))
        ts_acc = accuracy_score(test_true, trans_sample_preds)
        ts_f1w = f1_score(test_true, trans_sample_preds, average='weighted', zero_division=0)
        ts_f1m = f1_score(test_true, trans_sample_preds, average='macro', zero_division=0)
        print(f"\n4. Transition matrix + sampling baseline:")
        print(f"   acc={ts_acc:.3f} f1w={ts_f1w:.3f} f1m={ts_f1m:.3f}")

        # Per-class report for transition argmax
        print(f"\n   Per-class report (transition argmax):")
        report = classification_report(test_true, trans_preds,
                                       target_names=classes, zero_division=0)
        print(report)


if __name__ == '__main__':
    main()