Dataset-Creator / models.py
TitleOS's picture
Upload 9 files
390cebe verified
Raw
History Blame Contribute Delete
1.43 kB
"""Data models for dataset entries and field mappings.
These are plain dataclasses so they can live inside a gr.State without any
serialization step - Gradio keeps server-side state as real Python objects
per session, so we just mutate them in place.
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass, field
from typing import Literal, Optional
MappingKind = Literal["conversation_list", "flat_pair", "unmapped"]
EntryStatus = Literal["empty", "detecting", "needs_mapping", "ready", "error"]
@dataclass
class FieldMapping:
"""How to pull a (system, user, assistant) triplet out of one raw row.
`config` holds whatever the given `kind` needs:
- conversation_list: list_field, role_key, content_key, human_tag, gpt_tag
- flat_pair: user_field, assistant_field
"""
kind: MappingKind
config: dict = field(default_factory=dict)
@dataclass
class DatasetEntry:
"""One row in the dataset-builder list."""
uid: str = field(default_factory=lambda: uuid.uuid4().hex[:8])
repo_id: str = ""
subset: str = ""
split: str = "train"
limit: int = 1000
system_prompt: str = ""
mapping: Optional[FieldMapping] = None
detected_columns: list = field(default_factory=list)
detected_list_info: Optional[dict] = None
sample_rows: list = field(default_factory=list)
status: EntryStatus = "empty"
error_message: str = ""