JunhanCai's picture
Initial commit with GEMS model and Dockerfile
6918c6b
Raw
History Blame Contribute Delete
2.28 kB
import json
import os
from typing import Iterable, Sequence
def load_label_mapping(file_path: str) -> list[str]:
extension = os.path.splitext(file_path)[1].lower()
if extension == ".txt":
labels: list[str] = []
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
for raw_line in f:
line = raw_line.strip()
if not line or line.startswith("#"):
continue
labels.append(line)
if not labels:
raise ValueError("Label mapping file does not contain any labels")
return labels
if extension == ".json":
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
labels = [str(item).strip() for item in data]
if not any(labels):
raise ValueError("Label mapping JSON list is empty")
return labels
if isinstance(data, dict):
indexed_labels: list[tuple[int, str]] = []
for key, value in data.items():
try:
index = int(key)
except (TypeError, ValueError) as exc:
raise ValueError("Label mapping JSON object keys must be numeric indices") from exc
indexed_labels.append((index, str(value).strip()))
if not indexed_labels:
raise ValueError("Label mapping JSON object is empty")
max_index = max(index for index, _ in indexed_labels)
labels = [""] * (max_index + 1)
for index, label in indexed_labels:
labels[index] = label
return labels
raise ValueError("Label mapping JSON must be a list or an object")
raise ValueError("Label mapping file must be .json or .txt")
def apply_label_mapping(base_labels: Sequence[str], mapping: Sequence[str] | None) -> list[str]:
labels = [str(label) for label in base_labels]
if not mapping:
return labels
mapped = list(labels)
for index, mapped_label in enumerate(mapping):
if index >= len(mapped):
break
value = str(mapped_label).strip()
if value:
mapped[index] = value
return mapped