Spaces:

lamossta
/

sv-task

Sleeping

App Files Files Community

lamossta commited on Apr 20

Commit

50aa44f

1 Parent(s): 399f588

schemas

Browse files

Files changed (4) hide show

src/schemas/data.py +35 -0
src/schemas/labels.py +122 -0
src/schemas/requests.py +22 -0
src/schemas/responses.py +14 -0

src/schemas/data.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from dataclasses import dataclass, field, fields
+@dataclass(frozen=True)
+class Position:
+    """A single character span within a sample's text."""
+    position_text: str = ""
+    length: int = 0
+    offset: int = 0
+@dataclass(frozen=True)
+class Entity:
+    """A mention group for one entity within a sample."""
+    entity_id: str = ""
+    entity_text: str = ""
+    entity_type: str = ""
+    positions: list = field(default_factory=list)
+    label: str = ""
+@dataclass(frozen=True)
+class Sample:
+    """A single news article with its annotated entities."""
+    id: str = ""
+    text: str = ""
+    entities: list = field(default_factory=list)
+def required_keys(cls) -> set[str]:
+    """Return the set of required top-level keys for a data dataclass."""
+    return {f.name for f in fields(cls)}

src/schemas/labels.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from dataclasses import dataclass, field
+@dataclass(frozen=True)
+class LabelRemap:
+    """Maps non-standard label strings to canonical equivalents."""
+    mapping: dict[str, str] = field(
+        default_factory=lambda: {"very positive": "positive"}
+    )
+@dataclass(frozen=True)
+class EntityTypes:
+    """Accepted entity type values."""
+    types: tuple[str, ...] = ("company", "location")
+@dataclass(frozen=True)
+class SentimentLabels:
+    """3-class sentiment label schema."""
+    classes: tuple[str, ...] = ("negative", "neutral", "positive")
+    label2id: dict[str, int] = field(default_factory=dict)
+    id2label: dict[int, str] = field(default_factory=dict)
+    def __post_init__(self):
+        object.__setattr__(
+            self, "label2id", {s: i for i, s in enumerate(self.classes)}
+        )
+        object.__setattr__(
+            self, "id2label", {i: s for i, s in enumerate(self.classes)}
+        )
+    @property
+    def num_labels(self) -> int:
+        return len(self.classes)
+@dataclass(frozen=True)
+class BinaryLabels:
+    """Binary (yes / no) label schema used by qa_b mode."""
+    classes: tuple[str, ...] = ("no", "yes")
+    label2id: dict[str, int] = field(default_factory=dict)
+    id2label: dict[int, str] = field(default_factory=dict)
+    def __post_init__(self):
+        object.__setattr__(
+            self, "label2id", {s: i for i, s in enumerate(self.classes)}
+        )
+        object.__setattr__(
+            self, "id2label", {i: s for i, s in enumerate(self.classes)}
+        )
+    @property
+    def num_labels(self) -> int:
+        return len(self.classes)
+# ── Training modes ────────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class MarkerMode:
+    """Single-sequence mode: wrap entity with [E]...[/E] special tokens.
+    Seg A:  "[E] Google [/E] had strong earnings but Microsoft missed."
+    Label:  3-way (negative=0, neutral=1, positive=2)
+    """
+    name: str = "marker"
+    entity_start: str = "[E]"
+    entity_end: str = "[/E]"
+    labels: SentimentLabels = field(default_factory=SentimentLabels)
+@dataclass(frozen=True)
+class QaMMode:
+    """Sentence-pair QA-M mode (Sun et al. 2019).
+    Seg A:  "Google had strong earnings but Microsoft missed."
+    Seg B:  "What do you think of the sentiment of the company Google ?"
+    Label:  3-way (negative=0, neutral=1, positive=2)
+    """
+    name: str = "qa_m"
+    question_template: str = "What do you think of the sentiment of the {entity_type} {entity} ?"
+    labels: SentimentLabels = field(default_factory=SentimentLabels)
+@dataclass(frozen=True)
+class QaBMode:
+    """Sentence-pair QA-B mode (Sun et al. 2019), binary.
+    Seg A:  "Google had strong earnings but Microsoft missed."
+    Seg B:  "The polarity of the company Google is positive ."
+    Label:  binary (no=0, yes=1)
+    Three forward passes per entity at inference; highest P(yes) wins.
+    """
+    name: str = "qa_b"
+    hypothesis_template: str = "The polarity of the {entity_type} {entity} is {sentiment} ."
+    labels: BinaryLabels = field(default_factory=BinaryLabels)
+    sentiment_labels: SentimentLabels = field(default_factory=SentimentLabels)
+SENTIMENT_LABELS = SentimentLabels()
+BINARY_LABELS = BinaryLabels()
+LABEL_REMAP = LabelRemap()
+ENTITY_TYPES = EntityTypes()
+MARKER_MODE = MarkerMode()
+QA_M_MODE = QaMMode()
+QA_B_MODE = QaBMode()
+MODES: dict[str, MarkerMode | QaMMode | QaBMode] = {
+    "marker": MARKER_MODE,
+    "qa_m": QA_M_MODE,
+    "qa_b": QA_B_MODE,
+}

src/schemas/requests.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from typing import Literal
+from pydantic import BaseModel
+class PositionInput(BaseModel):
+    position_text: str
+    length: int
+    offset: int
+class EntityInput(BaseModel):
+    entity_id: int
+    entity_text: str
+    entity_type: Literal["company", "location"]
+    positions: list[PositionInput]
+class SampleInput(BaseModel):
+    id: int
+    text: str
+    entities: list[EntityInput]

src/schemas/responses.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from typing import Literal
+from pydantic import BaseModel
+class EntityOutput(BaseModel):
+    entity_id: int
+    entity_text: str
+    classification: Literal["positive", "negative", "neutral"]
+class SampleOutput(BaseModel):
+    id: int
+    entities: list[EntityOutput]