Spaces:
Runtime error
Runtime error
File size: 3,102 Bytes
b339b93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | """Custom exception types for DIME conversion errors with detailed context."""
from dataclasses import dataclass
from pathlib import Path
@dataclass
class DIMEConversionError(Exception):
"""Base exception for DIME conversion errors."""
source_path: Path
message: str
def __str__(self) -> str:
return f"[{self.source_path.name}] {self.message}"
@dataclass
class CSVParseError(DIMEConversionError):
"""Error parsing CSV file."""
line_number: int | None = None
column_name: str | None = None
problematic_value: str | None = None
def __str__(self) -> str:
parts = [f"[{self.source_path.name}]"]
if self.line_number:
parts.append(f"line {self.line_number}")
if self.column_name:
parts.append(f"column '{self.column_name}'")
if self.problematic_value:
# Truncate long values
val = self.problematic_value[:100]
if len(self.problematic_value) > 100:
val += "..."
parts.append(f"value: {val!r}")
parts.append(self.message)
return " ".join(parts)
@dataclass
class RowCountMismatchError(DIMEConversionError):
"""Row count validation failed."""
expected_rows: int = 0
actual_rows: int = 0
def __str__(self) -> str:
diff = self.expected_rows - self.actual_rows
return (
f"[{self.source_path.name}] Row count mismatch: "
f"expected {self.expected_rows:,}, got {self.actual_rows:,} "
f"(difference: {diff:,})"
)
@dataclass
class ChecksumMismatchError(DIMEConversionError):
"""Column checksum validation failed."""
column_name: str = ""
expected_value: float | int = 0
actual_value: float | int = 0
def __str__(self) -> str:
return (
f"[{self.source_path.name}] Checksum mismatch for '{self.column_name}': "
f"expected {self.expected_value}, got {self.actual_value}"
)
@dataclass
class SampleMismatchError(DIMEConversionError):
"""Sample row comparison failed."""
row_index: int = 0
column_name: str = ""
expected_value: str = ""
actual_value: str = ""
def __str__(self) -> str:
return (
f"[{self.source_path.name}] Sample mismatch at row {self.row_index}, "
f"column '{self.column_name}': expected {self.expected_value!r}, "
f"got {self.actual_value!r}"
)
@dataclass
class SchemaValidationError(DIMEConversionError):
"""Schema validation failed."""
expected_columns: list[str] | None = None
actual_columns: list[str] | None = None
def __str__(self) -> str:
expected = set(self.expected_columns or [])
actual = set(self.actual_columns or [])
missing = expected - actual
extra = actual - expected
parts = [f"[{self.source_path.name}] Schema mismatch:"]
if missing:
parts.append(f"missing columns: {sorted(missing)}")
if extra:
parts.append(f"extra columns: {sorted(extra)}")
return " ".join(parts)
|