Spaces:
Running
Running
| """SQLModel tables for PolyglotAlpha v2 (README §5.35 — 10 base tables + 5 | |
| corpus/ground-truth/backtest tables for T7/T9 ingestion pipeline). | |
| All tables use JSON columns via SQLAlchemy's JSON type, so they work with | |
| both SQLite (default) and PostgreSQL (DATABASE_URL override). | |
| """ | |
| from __future__ import annotations | |
| import enum | |
| from datetime import datetime, timezone | |
| from typing import Any, Optional | |
| from sqlalchemy import CheckConstraint, Column, Index, JSON | |
| from sqlmodel import Field, SQLModel | |
| # --------------------------------------------------------------------------- | |
| # Enums (stored as plain strings for cross-DB portability) | |
| # --------------------------------------------------------------------------- | |
| class EventStatus(str, enum.Enum): | |
| PENDING = "PENDING" | |
| AUCTION_OPEN = "AUCTION_OPEN" | |
| AUCTION_SETTLED = "AUCTION_SETTLED" | |
| TRANSLATING = "TRANSLATING" | |
| EVALUATING = "EVALUATING" | |
| REJECTED = "REJECTED" | |
| COMMITTED = "COMMITTED" | |
| SUBMITTED = "SUBMITTED" | |
| FAILED = "FAILED" | |
| class JudgeVerdict(str, enum.Enum): | |
| PASS = "PASS" | |
| FAIL = "FAIL" | |
| PENDING = "PENDING" | |
| class PolymarketStatus(str, enum.Enum): | |
| PENDING = "PENDING" | |
| SUBMITTED = "SUBMITTED" | |
| LIVE = "LIVE" | |
| FAILED = "FAILED" | |
| SIMULATED = "SIMULATED" | |
| class SourceStatus(str, enum.Enum): | |
| ACTIVE = "ACTIVE" | |
| DISABLED = "DISABLED" | |
| ERROR = "ERROR" | |
| def _utcnow() -> datetime: | |
| return datetime.now(timezone.utc) | |
| # --------------------------------------------------------------------------- | |
| # 1. events | |
| # --------------------------------------------------------------------------- | |
| class Event(SQLModel, table=True): | |
| __tablename__ = "events" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| content_hash: str = Field(index=True, unique=True) | |
| sources: list[dict[str, Any]] = Field( | |
| default_factory=list, sa_column=Column(JSON, nullable=False) | |
| ) | |
| language: str = Field(default="en", index=True) | |
| triggered_at: datetime = Field(default_factory=_utcnow, index=True) | |
| status: str = Field(default=EventStatus.PENDING.value, index=True) | |
| title: Optional[str] = None | |
| # Set by the legacy ingestion dispatcher when it calls | |
| # ``TranslationAuction.openAuction``. The orchestrator stores its own | |
| # auction tx hashes in :class:`Auction.settlement_tx_hash`. | |
| tx_hash: Optional[str] = None | |
| # Execution mode for this lifecycle. ``"live"`` (default) runs real | |
| # LLM + real Arc tx + real RSS + real judges. ``"mock"`` short-circuits | |
| # the LLM, news fetch, judge panel, and chain calls with deterministic | |
| # fixtures so the demo button can produce a reproducible result without | |
| # external network calls. Indexed because the leaderboard / reputation | |
| # aggregates filter on ``mode='live'`` to keep mock events out of | |
| # public stats. | |
| mode: str = Field(default="live", nullable=False, index=True) | |
| # --------------------------------------------------------------------------- | |
| # 2. bids | |
| # --------------------------------------------------------------------------- | |
| class Bid(SQLModel, table=True): | |
| __tablename__ = "bids" | |
| __table_args__ = ( | |
| CheckConstraint( | |
| "bid_amount > 0 AND bid_amount < 1000000", | |
| name="bid_amount_positive_sane", | |
| ), | |
| CheckConstraint( | |
| "length(agent_address) > 0", | |
| name="agent_address_nonempty", | |
| ), | |
| ) | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| event_id: int = Field(foreign_key="events.id", index=True) | |
| agent_address: str = Field(index=True) | |
| bid_amount: float | |
| stake_amount: float = 5.0 | |
| candidate_hash: Optional[str] = None | |
| tx_hash: Optional[str] = None | |
| # Reputation snapshot at bid-time (0-1). Populated from BidRecord so the | |
| # historical view of "what reputation did this bidder have when they | |
| # bid?" survives even if AgentReputation rolls forward later. | |
| reputation: float = 1.0 | |
| submitted_at: datetime = Field(default_factory=_utcnow, index=True) | |
| # --------------------------------------------------------------------------- | |
| # 3. auctions | |
| # --------------------------------------------------------------------------- | |
| class Auction(SQLModel, table=True): | |
| __tablename__ = "auctions" | |
| event_id: int = Field(foreign_key="events.id", primary_key=True) | |
| winner_address: Optional[str] = Field(default=None, index=True) | |
| winning_bid: Optional[float] = None | |
| settlement_tx_hash: Optional[str] = None | |
| settled_at: Optional[datetime] = None | |
| # --------------------------------------------------------------------------- | |
| # 4. translations | |
| # --------------------------------------------------------------------------- | |
| class Translation(SQLModel, table=True): | |
| __tablename__ = "translations" | |
| event_id: int = Field(foreign_key="events.id", primary_key=True) | |
| translator_address: str = Field(index=True) | |
| pipeline_trace_ipfs: Optional[str] = None | |
| final_question_json: dict[str, Any] = Field( | |
| default_factory=dict, sa_column=Column(JSON, nullable=False) | |
| ) | |
| completed_at: datetime = Field(default_factory=_utcnow) | |
| # --------------------------------------------------------------------------- | |
| # 5. quality_scores | |
| # --------------------------------------------------------------------------- | |
| class QualityScore(SQLModel, table=True): | |
| __tablename__ = "quality_scores" | |
| __table_args__ = ( | |
| CheckConstraint( | |
| "overall_score >= 0 AND overall_score <= 1", | |
| name="overall_score_unit", | |
| ), | |
| CheckConstraint( | |
| "verdict IN ('PASS', 'FAIL', 'PENDING', 'BORDERLINE')", | |
| name="verdict_enum", | |
| ), | |
| ) | |
| event_id: int = Field(foreign_key="events.id", primary_key=True) | |
| translation_scores: dict[str, Any] = Field( | |
| default_factory=dict, sa_column=Column(JSON, nullable=False) | |
| ) | |
| style_alignment_passes: dict[str, Any] = Field( | |
| default_factory=dict, sa_column=Column(JSON, nullable=False) | |
| ) | |
| overall_score: float = 0.0 | |
| verdict: str = Field(default=JudgeVerdict.PENDING.value, index=True) | |
| evaluated_at: datetime = Field(default_factory=_utcnow) | |
| # --------------------------------------------------------------------------- | |
| # 6. questions | |
| # --------------------------------------------------------------------------- | |
| class Question(SQLModel, table=True): | |
| __tablename__ = "questions" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| event_id: int = Field(foreign_key="events.id", index=True) | |
| question_id_onchain: Optional[str] = Field(default=None, index=True) | |
| title_hash: Optional[str] = None | |
| builder_code: Optional[str] = None | |
| reasoning_ipfs: Optional[str] = None | |
| committed_at: datetime = Field(default_factory=_utcnow) | |
| # Arc commit transaction hash returned by QuestionRegistry.commitQuestion | |
| # (or the deterministic mock hash in mock mode). Surfaced by the UI on | |
| # the On-chain Anchor phase as a TxLink to testnet.arcscan.app. | |
| tx_hash: Optional[str] = None | |
| # --------------------------------------------------------------------------- | |
| # 7. polymarket_submissions | |
| # --------------------------------------------------------------------------- | |
| class PolymarketSubmission(SQLModel, table=True): | |
| __tablename__ = "polymarket_submissions" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| event_id: int = Field(foreign_key="events.id", index=True) | |
| market_id: Optional[str] = Field(default=None, index=True) | |
| submitted_at: datetime = Field(default_factory=_utcnow) | |
| market_url: Optional[str] = None | |
| status: str = Field(default=PolymarketStatus.PENDING.value, index=True) | |
| is_simulated: bool = False | |
| # Rich submission metadata so the UI can surface what was actually | |
| # sent to the Polymarket V2 builder API (and what the response / | |
| # builder-fee linkage looks like). Added 2026-05-26 — backfilled | |
| # with NULL on existing rows by ``_migrate_polymarket_submissions``. | |
| mode: Optional[str] = Field(default=None) | |
| fees_estimate_usdc: Optional[float] = Field(default=None) | |
| payload: Optional[dict[str, Any]] = Field(default=None, sa_column=Column(JSON)) | |
| # --------------------------------------------------------------------------- | |
| # 8. builder_fee_events | |
| # --------------------------------------------------------------------------- | |
| class BuilderFeeEvent(SQLModel, table=True): | |
| __tablename__ = "builder_fee_events" | |
| __table_args__ = ( | |
| CheckConstraint("fill_amount >= 0", name="fill_nonneg"), | |
| CheckConstraint( | |
| "fee_amount >= 0 AND fee_amount <= fill_amount", | |
| name="fee_within_fill", | |
| ), | |
| ) | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| market_id: str = Field(index=True) | |
| fill_amount: float | |
| fee_amount: float | |
| translator_address: str = Field(index=True) | |
| arc_tx_hash: Optional[str] = None | |
| timestamp: datetime = Field(default_factory=_utcnow, index=True) | |
| is_simulated: bool = False | |
| # --------------------------------------------------------------------------- | |
| # 9. agent_reputation | |
| # --------------------------------------------------------------------------- | |
| class AgentReputation(SQLModel, table=True): | |
| __tablename__ = "agent_reputation" | |
| __table_args__ = ( | |
| CheckConstraint("total_wins <= total_bids", name="wins_le_bids"), | |
| CheckConstraint("cumulative_fees >= 0", name="fees_nonneg"), | |
| CheckConstraint( | |
| "avg_quality >= 0 AND avg_quality <= 1", | |
| name="avg_quality_unit", | |
| ), | |
| # Hot-path leaderboard query sorts by cumulative_fees DESC. | |
| Index( | |
| "ix_agent_reputation_cumulative_fees_desc", | |
| "cumulative_fees", | |
| postgresql_using="btree", | |
| ), | |
| ) | |
| agent_address: str = Field(primary_key=True) | |
| total_bids: int = 0 | |
| total_wins: int = 0 | |
| avg_quality: float = 0.0 | |
| cumulative_fees: float = 0.0 | |
| last_updated: datetime = Field(default_factory=_utcnow) | |
| # --------------------------------------------------------------------------- | |
| # 10. sources | |
| # --------------------------------------------------------------------------- | |
| class Source(SQLModel, table=True): | |
| __tablename__ = "sources" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| name: str = Field(index=True, unique=True) | |
| url: str | |
| # ``language`` + ``status`` previously had per-column indexes that were | |
| # never queried — dropped to reduce write overhead (DB integrity report). | |
| language: str = "en" | |
| last_fetched: Optional[datetime] = None | |
| status: str = SourceStatus.ACTIVE.value | |
| # --------------------------------------------------------------------------- | |
| # Corpus / ground-truth / backtest tables (extension for T7/T9 ingestion). | |
| # --------------------------------------------------------------------------- | |
| class FewShotRole(str, enum.Enum): | |
| POSITIVE_EXAMPLE = "POSITIVE_EXAMPLE" | |
| NEGATIVE_EXAMPLE = "NEGATIVE_EXAMPLE" | |
| EDGE_CASE = "EDGE_CASE" | |
| class CorpusMarketState(str, enum.Enum): | |
| ACTIVE = "active" | |
| CLOSED = "closed" | |
| RESOLVED = "resolved" | |
| DISPUTED = "disputed" | |
| ARCHIVED = "archived" | |
| # --------------------------------------------------------------------------- | |
| # 11. corpus_markets | |
| # --------------------------------------------------------------------------- | |
| class CorpusMarket(SQLModel, table=True): | |
| """All Polymarket markets (open + resolved) — source-of-truth corpus.""" | |
| __tablename__ = "corpus_markets" | |
| __table_args__ = ( | |
| CheckConstraint( | |
| "state != 'resolved' OR outcome IS NOT NULL", | |
| name="resolved_has_outcome", | |
| ), | |
| CheckConstraint( | |
| "end_date IS NULL OR created_at IS NULL OR end_date >= created_at", | |
| name="time_order", | |
| ), | |
| ) | |
| market_id: str = Field(primary_key=True) | |
| question: str | |
| category: Optional[str] = Field(default=None, index=True) | |
| subcategory: Optional[str] = None | |
| tags: Optional[list[str]] = Field(default=None, sa_column=Column(JSON)) | |
| created_at: Optional[datetime] = None | |
| end_date: Optional[datetime] = None | |
| resolved_at: Optional[datetime] = Field(default=None, index=True) | |
| state: str = Field(default=CorpusMarketState.ACTIVE.value, index=True) | |
| outcome: Optional[str] = Field(default=None, index=True) | |
| outcome_prices: Optional[list[float]] = Field(default=None, sa_column=Column(JSON)) | |
| total_volume_usdc: Optional[float] = None | |
| uma_dispute: bool = False | |
| resolution_source: Optional[str] = None | |
| is_community_created: bool = False | |
| embedding_idx: Optional[int] = Field(default=None, index=True) | |
| framing_pattern: Optional[str] = Field(default=None, index=True) | |
| # --------------------------------------------------------------------------- | |
| # 12. few_shot_exemplars | |
| # --------------------------------------------------------------------------- | |
| class FewShotExemplar(SQLModel, table=True): | |
| """Exemplars used by LLM judges for in-context learning.""" | |
| __tablename__ = "few_shot_exemplars" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| market_id: Optional[str] = Field( | |
| default=None, foreign_key="corpus_markets.market_id", index=True | |
| ) | |
| judge_dimension: str = Field(index=True) | |
| role: str = Field(default=FewShotRole.POSITIVE_EXAMPLE.value, index=True) | |
| question_text: str | |
| explanation: str | |
| weight: float = 1.0 | |
| # --------------------------------------------------------------------------- | |
| # 13. style_rules | |
| # --------------------------------------------------------------------------- | |
| class StyleRule(SQLModel, table=True): | |
| """Distilled style-guide bullets — LLM-distilled from corpus.""" | |
| __tablename__ = "style_rules" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| rule_text: str | |
| dimension: Optional[str] = Field(default=None, index=True) | |
| source: str = "llm_distilled" | |
| confidence: float = 1.0 | |
| # --------------------------------------------------------------------------- | |
| # 14. reference_translations | |
| # --------------------------------------------------------------------------- | |
| class ReferenceTranslation(SQLModel, table=True): | |
| """Human-verified reference translations for demo samples (D-judge gold).""" | |
| __tablename__ = "reference_translations" | |
| sample_id: int = Field(primary_key=True) | |
| source_chinese: str | |
| primary_translation: str | |
| alternative_phrasings: list[str] = Field( | |
| default_factory=list, sa_column=Column(JSON, nullable=False) | |
| ) | |
| k5_framing_variants: list[str] = Field( | |
| default_factory=list, sa_column=Column(JSON, nullable=False) | |
| ) | |
| expected_bleu_threshold: float = 25.0 | |
| expected_comet_threshold: float = 0.55 | |
| polymarket_shape_validation: Optional[dict[str, Any]] = Field( | |
| default=None, sa_column=Column(JSON) | |
| ) | |
| annotator_notes: Optional[str] = None | |
| # --------------------------------------------------------------------------- | |
| # 15. backtest_results | |
| # --------------------------------------------------------------------------- | |
| class BacktestResult(SQLModel, table=True): | |
| """Per-event backtest record: agent prediction vs resolved outcome.""" | |
| __tablename__ = "backtest_results" | |
| id: Optional[int] = Field(default=None, primary_key=True) | |
| market_id: str = Field(foreign_key="corpus_markets.market_id", index=True) | |
| agent_address: str = Field(index=True) | |
| predicted_outcome: Optional[str] = None | |
| actual_outcome: str | |
| correct: bool = False | |
| estimated_profit_usdc: float = 0.0 | |
| # ``judge_verdict`` + ``backtested_at`` previously had per-column indexes | |
| # that were never queried — dropped to reduce write overhead. | |
| judge_verdict: str = JudgeVerdict.PENDING.value | |
| judge_score: float = 0.0 | |
| notes: Optional[str] = None | |
| backtested_at: datetime = Field(default_factory=_utcnow) | |
| __all__ = [ | |
| "AgentReputation", | |
| "Auction", | |
| "BacktestResult", | |
| "Bid", | |
| "BuilderFeeEvent", | |
| "CorpusMarket", | |
| "CorpusMarketState", | |
| "Event", | |
| "EventStatus", | |
| "FewShotExemplar", | |
| "FewShotRole", | |
| "JudgeVerdict", | |
| "PolymarketStatus", | |
| "PolymarketSubmission", | |
| "QualityScore", | |
| "Question", | |
| "ReferenceTranslation", | |
| "Source", | |
| "SourceStatus", | |
| "StyleRule", | |
| "Translation", | |
| ] | |