"""Data models for dataset entries and field mappings. These are plain dataclasses so they can live inside a gr.State without any serialization step - Gradio keeps server-side state as real Python objects per session, so we just mutate them in place. """ from __future__ import annotations import uuid from dataclasses import dataclass, field from typing import Literal, Optional MappingKind = Literal["conversation_list", "flat_pair", "unmapped"] EntryStatus = Literal["empty", "detecting", "needs_mapping", "ready", "error"] @dataclass class FieldMapping: """How to pull a (system, user, assistant) triplet out of one raw row. `config` holds whatever the given `kind` needs: - conversation_list: list_field, role_key, content_key, human_tag, gpt_tag - flat_pair: user_field, assistant_field """ kind: MappingKind config: dict = field(default_factory=dict) @dataclass class DatasetEntry: """One row in the dataset-builder list.""" uid: str = field(default_factory=lambda: uuid.uuid4().hex[:8]) repo_id: str = "" subset: str = "" split: str = "train" limit: int = 1000 system_prompt: str = "" mapping: Optional[FieldMapping] = None detected_columns: list = field(default_factory=list) detected_list_info: Optional[dict] = None sample_rows: list = field(default_factory=list) status: EntryStatus = "empty" error_message: str = ""