Spaces:
Sleeping
Sleeping
| """ | |
| Deal Validator - Pydantic models for validation | |
| """ | |
| from pydantic import BaseModel, Field, validator | |
| from typing import Optional, List, Dict, Any | |
| from datetime import datetime | |
| from enum import Enum | |
| class Modality(str, Enum): | |
| TEXT = "Text" | |
| IMAGE = "Image" | |
| AUDIO = "Audio" | |
| VIDEO = "Video" | |
| SATELLITE = "Satellite" | |
| HEALTH_BIOTECH = "Health / Biotech" | |
| CORPORATE_INFRA = "Corporate / data infra" | |
| LEGAL_BOOKS = "Legal / Books" | |
| COMMISSIONING = "Commissioning" | |
| MIXED = "Mixed" | |
| class DealType(str, Enum): | |
| AGGREGATE = "aggregate" | |
| PER_UNIT = "per-unit" | |
| COMMISSIONING = "commissioning" | |
| SETTLEMENT = "settlement" | |
| ACQUISITION = "acquisition" | |
| COMMONS = "commons" | |
| IMPLICIT = "implicit" | |
| HYBRID = "hybrid" | |
| class DealStage(str, Enum): | |
| ANNOUNCED = "announced" | |
| RUMORED = "rumored" | |
| CONFIRMED = "confirmed" | |
| SETTLED = "settled" | |
| class DealData(BaseModel): | |
| """Validated deal data structure""" | |
| # Core fields (required) | |
| provider: str = Field(..., min_length=1) | |
| buyer: str = Field(..., min_length=1) | |
| modality: str = Field(default="Text") | |
| data_type: Optional[str] = None | |
| # Pricing | |
| price_usd: Optional[float] = Field(None, ge=0) | |
| price_range_min_usd: Optional[float] = Field(None, ge=0) | |
| price_range_max_usd: Optional[float] = Field(None, ge=0) | |
| price_currency: str = Field(default="USD") | |
| reported_terms: Optional[str] = None | |
| pricing_mechanism: Optional[str] = None | |
| deal_type: Optional[str] = None | |
| # Dates | |
| date: Optional[str] = None # YYYY-MM-DD or YYYY or YYYY-MM | |
| start_date: Optional[datetime] = None | |
| end_date: Optional[datetime] = None | |
| duration_years: Optional[float] = Field(None, ge=0) | |
| # Rights | |
| exclusive: Optional[bool] = None | |
| creators_compensated: Optional[bool] = None | |
| creator_split_percentage: Optional[float] = Field(None, ge=0, le=100) | |
| revenue_share: Optional[bool] = None | |
| # Rights granted | |
| training_allowed: Optional[bool] = None | |
| finetuning_allowed: Optional[bool] = None | |
| inference_allowed: Optional[bool] = None | |
| redistribution_allowed: Optional[bool] = None | |
| deletion_required: Optional[bool] = None | |
| # Provenance | |
| sources: List[str] = Field(default_factory=list) | |
| source_primary: Optional[str] = None | |
| discovered_via: Optional[str] = None | |
| exa_query: Optional[str] = None | |
| exa_score: Optional[float] = None | |
| exa_retrieved_at: Optional[str] = None | |
| # Extraction metadata | |
| extraction_metadata: Optional[Dict[str, Any]] = None | |
| raw_text_snippets: List[str] = Field(default_factory=list) | |
| regex_confidence: Optional[str] = None | |
| llm_confidence: Optional[str] = None | |
| last_extracted: Optional[str] = None | |
| # Linkages | |
| linkages_metadata: Optional[Dict[str, Any]] = None | |
| notes: Optional[str] = None | |
| deal_stage: str = Field(default="announced") | |
| confidence_score: float = Field(default=0.5, ge=0, le=1) | |
| version: Optional[str] = None | |
| def validate_modality(cls, v): | |
| """Normalize modality""" | |
| if v in [m.value for m in Modality]: | |
| return v | |
| return "Text" # Default | |
| def validate_deal_type(cls, v): | |
| """Normalize deal type""" | |
| if not v: | |
| return None | |
| if v in [d.value for d in DealType]: | |
| return v | |
| return None | |
| def validate_deal_stage(cls, v): | |
| """Normalize deal stage""" | |
| if v in [s.value for s in DealStage]: | |
| return v | |
| return "announced" | |
| def validate_price_range(cls, v, values): | |
| """Ensure max >= min""" | |
| if v and values.get("price_range_min_usd"): | |
| if v < values["price_range_min_usd"]: | |
| raise ValueError("price_range_max_usd must be >= price_range_min_usd") | |
| return v | |
| def validate_end_date(cls, v, values): | |
| """Ensure end_date >= start_date""" | |
| if v and values.get("start_date"): | |
| if v < values["start_date"]: | |
| raise ValueError("end_date must be >= start_date") | |
| return v | |
| class Config: | |
| use_enum_values = True | |
| class DealValidator: | |
| """Validator for deal data""" | |
| def validate(deal_dict: dict) -> tuple[bool, Optional[DealData], Optional[str]]: | |
| """ | |
| Validate deal data | |
| Returns: | |
| (is_valid, DealData object, error_message) | |
| """ | |
| try: | |
| deal_data = DealData(**deal_dict) | |
| return True, deal_data, None | |
| except Exception as e: | |
| return False, None, str(e) | |
| def is_valid_deal(deal_dict: dict) -> bool: | |
| """Check if deal is valid""" | |
| is_valid, _, _ = DealValidator.validate(deal_dict) | |
| return is_valid | |