Spatial-BEATs / scripts /map_real_manifest.py
dieKarotte's picture
Add files using upload-large-folder tool
29615e9 verified
Raw
History Blame Contribute Delete
6 kB
#!/usr/bin/env python3
"""
map_real_manifest.py
====================
把 STARSS22/23 real-FOA manifest(ov1/ov2/ov3_real_static_foa.jsonl)里的
class_name/class_id(DCASE 29-class)映射到 FSD50K 63-class 名称,
生成 *_mapped.jsonl,让真实数据可以直接接入仿真训练流水线。
映射规则(STARSS class → FSD50K label):
近似匹配原则:选择语义最接近、且在仿真数据中出现频率合理的类。
用法:
python scripts/map_real_manifest.py
# 输出到 /data/metadata/ov1_real_static_foa_mapped.jsonl 等
"""
import json
import copy
import argparse
from pathlib import Path
# ---------------------------------------------------------------------------
# STARSS (real) class_name → FSD50K (sim) mono_target_label
# class_id 0..28 of DCASE 2024 SELD
# ---------------------------------------------------------------------------
STARSS_TO_FSD50K: dict[str, str] = {
"female_speech": "female_speech", # 完全匹配
"male_speech": "male_speech", # 完全匹配
"speech": "speech", # 完全匹配
"laughter": "laughter", # 完全匹配
"clapping": "body_sound", # clapping ⊂ body_sound
"telephone": "telephone_alarm", # telephone ≈ telephone_alarm
"knock": "knock", # 完全匹配
"footsteps": "footsteps", # 完全匹配
"door": "door", # 完全匹配
"drawer": "drawer_cabinet", # drawer ≈ drawer_cabinet
"music": "musical_instrument", # music ≈ musical_instrument (broad)
"piano": "keyboard_instrument", # piano ⊂ keyboard_instrument
"bell": "bell", # 完全匹配
"alarm": "alarm", # 完全匹配
"car_horn": "car", # car_horn ⊂ car
"domestic_sounds": "home_sound", # domestic_sounds ≈ home_sound
"water_tap_and_faucet":"water", # water_tap ⊂ water
"dog_bark": "dog", # dog_bark ⊂ dog
"crying_baby": "human_vocalization", # crying_baby ⊂ human_vocalization
"crash": "crushing", # crash ≈ crushing (impact sound)
"cough": "body_sound", # cough ⊂ body_sound
"clearthroat": "body_sound", # clearthroat ⊂ body_sound
"keyboard": "typing", # keyboard ≈ typing
"pageturn": "paper", # pageturn ⊂ paper
"keysdrop": "metal_clink", # keysdrop ≈ metal_clink
"gun_shot": "war_sound", # gun_shot ⊂ war_sound
"drilling": "tool", # drilling ⊂ tool
"engine_idling": "machine", # engine_idling ⊂ machine
"jackhammer": "tool", # jackhammer ⊂ tool
}
def map_manifest(input_path: Path, output_path: Path) -> None:
"""Map a single JSONL manifest and write the result."""
unmapped: set[str] = set()
n_ok = 0
n_dist_null = 0
n_dist_ok = 0
with open(input_path) as fin, open(output_path, "w") as fout:
for line in fin:
entry = json.loads(line)
entry = copy.deepcopy(entry)
for source in entry.get("sources", []):
original_name = source.get("class_name", source.get("mono_target_label", ""))
mapped_name = STARSS_TO_FSD50K.get(original_name)
if mapped_name is None:
unmapped.add(original_name)
# Keep original; will likely raise KeyError during training
mapped_name = original_name
# Overwrite class fields so _resolve_class_index picks up the right label
source["mono_target_label"] = mapped_name
source["class_name"] = mapped_name
# Remove class_id to avoid stale numeric id confusing label_id_to_index
source.pop("class_id", None)
# Mark distance validity
raw_dist = source.get("distance_cm")
if raw_dist is None:
source["distance_valid"] = False
n_dist_null += 1
else:
source["distance_valid"] = True
n_dist_ok += 1
fout.write(json.dumps(entry, ensure_ascii=False) + "\n")
n_ok += 1
print(f" {input_path.name}{output_path.name}")
print(f" entries: {n_ok}")
print(f" dist_ok={n_dist_ok} dist_null={n_dist_null}")
if unmapped:
print(f" ⚠️ UNMAPPED class names: {unmapped}")
else:
print(f" all class names mapped ✓")
def main() -> None:
parser = argparse.ArgumentParser(description="Map real FOA manifest to FSD50K vocab")
parser.add_argument(
"--metadata-dir",
default="/apdcephfs_cq10/share_1603164/user/schmittzhu/data/metadata",
)
parser.add_argument(
"--datasets",
nargs="+",
default=["ov1_real_static_foa", "ov2_real_static_foa", "ov3_real_static_foa"],
)
args = parser.parse_args()
meta_dir = Path(args.metadata_dir)
for dataset in args.datasets:
in_path = meta_dir / f"{dataset}.jsonl"
out_path = meta_dir / f"{dataset}_mapped.jsonl"
if not in_path.exists():
print(f" SKIP (not found): {in_path}")
continue
print(f"\nProcessing {dataset} ...")
map_manifest(in_path, out_path)
print("\nDone.")
print("\nClass mapping used:")
for starss, fsd in sorted(STARSS_TO_FSD50K.items()):
arrow = "≈" if starss != fsd else "="
print(f" {starss:30s} {arrow} {fsd}")
if __name__ == "__main__":
main()