Spaces:
Running
Running
File size: 1,425 Bytes
390cebe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | """Data models for dataset entries and field mappings.
These are plain dataclasses so they can live inside a gr.State without any
serialization step - Gradio keeps server-side state as real Python objects
per session, so we just mutate them in place.
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass, field
from typing import Literal, Optional
MappingKind = Literal["conversation_list", "flat_pair", "unmapped"]
EntryStatus = Literal["empty", "detecting", "needs_mapping", "ready", "error"]
@dataclass
class FieldMapping:
"""How to pull a (system, user, assistant) triplet out of one raw row.
`config` holds whatever the given `kind` needs:
- conversation_list: list_field, role_key, content_key, human_tag, gpt_tag
- flat_pair: user_field, assistant_field
"""
kind: MappingKind
config: dict = field(default_factory=dict)
@dataclass
class DatasetEntry:
"""One row in the dataset-builder list."""
uid: str = field(default_factory=lambda: uuid.uuid4().hex[:8])
repo_id: str = ""
subset: str = ""
split: str = "train"
limit: int = 1000
system_prompt: str = ""
mapping: Optional[FieldMapping] = None
detected_columns: list = field(default_factory=list)
detected_list_info: Optional[dict] = None
sample_rows: list = field(default_factory=list)
status: EntryStatus = "empty"
error_message: str = ""
|