Spaces:
Running
Running
| """Data models for dataset entries and field mappings. | |
| These are plain dataclasses so they can live inside a gr.State without any | |
| serialization step - Gradio keeps server-side state as real Python objects | |
| per session, so we just mutate them in place. | |
| """ | |
| from __future__ import annotations | |
| import uuid | |
| from dataclasses import dataclass, field | |
| from typing import Literal, Optional | |
| MappingKind = Literal["conversation_list", "flat_pair", "unmapped"] | |
| EntryStatus = Literal["empty", "detecting", "needs_mapping", "ready", "error"] | |
| class FieldMapping: | |
| """How to pull a (system, user, assistant) triplet out of one raw row. | |
| `config` holds whatever the given `kind` needs: | |
| - conversation_list: list_field, role_key, content_key, human_tag, gpt_tag | |
| - flat_pair: user_field, assistant_field | |
| """ | |
| kind: MappingKind | |
| config: dict = field(default_factory=dict) | |
| class DatasetEntry: | |
| """One row in the dataset-builder list.""" | |
| uid: str = field(default_factory=lambda: uuid.uuid4().hex[:8]) | |
| repo_id: str = "" | |
| subset: str = "" | |
| split: str = "train" | |
| limit: int = 1000 | |
| system_prompt: str = "" | |
| mapping: Optional[FieldMapping] = None | |
| detected_columns: list = field(default_factory=list) | |
| detected_list_info: Optional[dict] = None | |
| sample_rows: list = field(default_factory=list) | |
| status: EntryStatus = "empty" | |
| error_message: str = "" | |