Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import Iterable, Any | |
| def pick_text_column(columns: list[str]) -> str: | |
| """ | |
| Try to robustly choose the text field from various corpus schemas. | |
| """ | |
| candidates = ["text", "content", "document", "passage", "passages", "contents", "wiki_text"] | |
| for c in candidates: | |
| if c in columns: | |
| return c | |
| return columns[0] | |
| def pick_id_column(columns: list[str]) -> str | None: | |
| for c in ["id", "doc_id", "document_id", "passage_id", "pid"]: | |
| if c in columns: | |
| return c | |
| return None | |
| def iter_corpus_rows(ds) -> Iterable[dict[str, Any]]: | |
| for row in ds: | |
| yield row | |