licaomeng
deploy: main@8970ffb → HF Spaces (2026-05-27T05:19Z)
88d2f2a
"""SQLModel tables for PolyglotAlpha v2 (README §5.35 — 10 base tables + 5
corpus/ground-truth/backtest tables for T7/T9 ingestion pipeline).
All tables use JSON columns via SQLAlchemy's JSON type, so they work with
both SQLite (default) and PostgreSQL (DATABASE_URL override).
"""
from __future__ import annotations
import enum
from datetime import datetime, timezone
from typing import Any, Optional
from sqlalchemy import CheckConstraint, Column, Index, JSON
from sqlmodel import Field, SQLModel
# ---------------------------------------------------------------------------
# Enums (stored as plain strings for cross-DB portability)
# ---------------------------------------------------------------------------
class EventStatus(str, enum.Enum):
PENDING = "PENDING"
AUCTION_OPEN = "AUCTION_OPEN"
AUCTION_SETTLED = "AUCTION_SETTLED"
TRANSLATING = "TRANSLATING"
EVALUATING = "EVALUATING"
REJECTED = "REJECTED"
COMMITTED = "COMMITTED"
SUBMITTED = "SUBMITTED"
FAILED = "FAILED"
class JudgeVerdict(str, enum.Enum):
PASS = "PASS"
FAIL = "FAIL"
PENDING = "PENDING"
class PolymarketStatus(str, enum.Enum):
PENDING = "PENDING"
SUBMITTED = "SUBMITTED"
LIVE = "LIVE"
FAILED = "FAILED"
SIMULATED = "SIMULATED"
class SourceStatus(str, enum.Enum):
ACTIVE = "ACTIVE"
DISABLED = "DISABLED"
ERROR = "ERROR"
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
# ---------------------------------------------------------------------------
# 1. events
# ---------------------------------------------------------------------------
class Event(SQLModel, table=True):
__tablename__ = "events"
id: Optional[int] = Field(default=None, primary_key=True)
content_hash: str = Field(index=True, unique=True)
sources: list[dict[str, Any]] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
language: str = Field(default="en", index=True)
triggered_at: datetime = Field(default_factory=_utcnow, index=True)
status: str = Field(default=EventStatus.PENDING.value, index=True)
title: Optional[str] = None
# Set by the legacy ingestion dispatcher when it calls
# ``TranslationAuction.openAuction``. The orchestrator stores its own
# auction tx hashes in :class:`Auction.settlement_tx_hash`.
tx_hash: Optional[str] = None
# Execution mode for this lifecycle. ``"live"`` (default) runs real
# LLM + real Arc tx + real RSS + real judges. ``"mock"`` short-circuits
# the LLM, news fetch, judge panel, and chain calls with deterministic
# fixtures so the demo button can produce a reproducible result without
# external network calls. Indexed because the leaderboard / reputation
# aggregates filter on ``mode='live'`` to keep mock events out of
# public stats.
mode: str = Field(default="live", nullable=False, index=True)
# ---------------------------------------------------------------------------
# 2. bids
# ---------------------------------------------------------------------------
class Bid(SQLModel, table=True):
__tablename__ = "bids"
__table_args__ = (
CheckConstraint(
"bid_amount > 0 AND bid_amount < 1000000",
name="bid_amount_positive_sane",
),
CheckConstraint(
"length(agent_address) > 0",
name="agent_address_nonempty",
),
)
id: Optional[int] = Field(default=None, primary_key=True)
event_id: int = Field(foreign_key="events.id", index=True)
agent_address: str = Field(index=True)
bid_amount: float
stake_amount: float = 5.0
candidate_hash: Optional[str] = None
tx_hash: Optional[str] = None
# Reputation snapshot at bid-time (0-1). Populated from BidRecord so the
# historical view of "what reputation did this bidder have when they
# bid?" survives even if AgentReputation rolls forward later.
reputation: float = 1.0
submitted_at: datetime = Field(default_factory=_utcnow, index=True)
# ---------------------------------------------------------------------------
# 3. auctions
# ---------------------------------------------------------------------------
class Auction(SQLModel, table=True):
__tablename__ = "auctions"
event_id: int = Field(foreign_key="events.id", primary_key=True)
winner_address: Optional[str] = Field(default=None, index=True)
winning_bid: Optional[float] = None
settlement_tx_hash: Optional[str] = None
settled_at: Optional[datetime] = None
# ---------------------------------------------------------------------------
# 4. translations
# ---------------------------------------------------------------------------
class Translation(SQLModel, table=True):
__tablename__ = "translations"
event_id: int = Field(foreign_key="events.id", primary_key=True)
translator_address: str = Field(index=True)
pipeline_trace_ipfs: Optional[str] = None
final_question_json: dict[str, Any] = Field(
default_factory=dict, sa_column=Column(JSON, nullable=False)
)
completed_at: datetime = Field(default_factory=_utcnow)
# ---------------------------------------------------------------------------
# 5. quality_scores
# ---------------------------------------------------------------------------
class QualityScore(SQLModel, table=True):
__tablename__ = "quality_scores"
__table_args__ = (
CheckConstraint(
"overall_score >= 0 AND overall_score <= 1",
name="overall_score_unit",
),
CheckConstraint(
"verdict IN ('PASS', 'FAIL', 'PENDING', 'BORDERLINE')",
name="verdict_enum",
),
)
event_id: int = Field(foreign_key="events.id", primary_key=True)
translation_scores: dict[str, Any] = Field(
default_factory=dict, sa_column=Column(JSON, nullable=False)
)
style_alignment_passes: dict[str, Any] = Field(
default_factory=dict, sa_column=Column(JSON, nullable=False)
)
overall_score: float = 0.0
verdict: str = Field(default=JudgeVerdict.PENDING.value, index=True)
evaluated_at: datetime = Field(default_factory=_utcnow)
# ---------------------------------------------------------------------------
# 6. questions
# ---------------------------------------------------------------------------
class Question(SQLModel, table=True):
__tablename__ = "questions"
id: Optional[int] = Field(default=None, primary_key=True)
event_id: int = Field(foreign_key="events.id", index=True)
question_id_onchain: Optional[str] = Field(default=None, index=True)
title_hash: Optional[str] = None
builder_code: Optional[str] = None
reasoning_ipfs: Optional[str] = None
committed_at: datetime = Field(default_factory=_utcnow)
# Arc commit transaction hash returned by QuestionRegistry.commitQuestion
# (or the deterministic mock hash in mock mode). Surfaced by the UI on
# the On-chain Anchor phase as a TxLink to testnet.arcscan.app.
tx_hash: Optional[str] = None
# ---------------------------------------------------------------------------
# 7. polymarket_submissions
# ---------------------------------------------------------------------------
class PolymarketSubmission(SQLModel, table=True):
__tablename__ = "polymarket_submissions"
id: Optional[int] = Field(default=None, primary_key=True)
event_id: int = Field(foreign_key="events.id", index=True)
market_id: Optional[str] = Field(default=None, index=True)
submitted_at: datetime = Field(default_factory=_utcnow)
market_url: Optional[str] = None
status: str = Field(default=PolymarketStatus.PENDING.value, index=True)
is_simulated: bool = False
# Rich submission metadata so the UI can surface what was actually
# sent to the Polymarket V2 builder API (and what the response /
# builder-fee linkage looks like). Added 2026-05-26 — backfilled
# with NULL on existing rows by ``_migrate_polymarket_submissions``.
mode: Optional[str] = Field(default=None)
fees_estimate_usdc: Optional[float] = Field(default=None)
payload: Optional[dict[str, Any]] = Field(default=None, sa_column=Column(JSON))
# ---------------------------------------------------------------------------
# 8. builder_fee_events
# ---------------------------------------------------------------------------
class BuilderFeeEvent(SQLModel, table=True):
__tablename__ = "builder_fee_events"
__table_args__ = (
CheckConstraint("fill_amount >= 0", name="fill_nonneg"),
CheckConstraint(
"fee_amount >= 0 AND fee_amount <= fill_amount",
name="fee_within_fill",
),
)
id: Optional[int] = Field(default=None, primary_key=True)
market_id: str = Field(index=True)
fill_amount: float
fee_amount: float
translator_address: str = Field(index=True)
arc_tx_hash: Optional[str] = None
timestamp: datetime = Field(default_factory=_utcnow, index=True)
is_simulated: bool = False
# ---------------------------------------------------------------------------
# 9. agent_reputation
# ---------------------------------------------------------------------------
class AgentReputation(SQLModel, table=True):
__tablename__ = "agent_reputation"
__table_args__ = (
CheckConstraint("total_wins <= total_bids", name="wins_le_bids"),
CheckConstraint("cumulative_fees >= 0", name="fees_nonneg"),
CheckConstraint(
"avg_quality >= 0 AND avg_quality <= 1",
name="avg_quality_unit",
),
# Hot-path leaderboard query sorts by cumulative_fees DESC.
Index(
"ix_agent_reputation_cumulative_fees_desc",
"cumulative_fees",
postgresql_using="btree",
),
)
agent_address: str = Field(primary_key=True)
total_bids: int = 0
total_wins: int = 0
avg_quality: float = 0.0
cumulative_fees: float = 0.0
last_updated: datetime = Field(default_factory=_utcnow)
# ---------------------------------------------------------------------------
# 10. sources
# ---------------------------------------------------------------------------
class Source(SQLModel, table=True):
__tablename__ = "sources"
id: Optional[int] = Field(default=None, primary_key=True)
name: str = Field(index=True, unique=True)
url: str
# ``language`` + ``status`` previously had per-column indexes that were
# never queried — dropped to reduce write overhead (DB integrity report).
language: str = "en"
last_fetched: Optional[datetime] = None
status: str = SourceStatus.ACTIVE.value
# ---------------------------------------------------------------------------
# Corpus / ground-truth / backtest tables (extension for T7/T9 ingestion).
# ---------------------------------------------------------------------------
class FewShotRole(str, enum.Enum):
POSITIVE_EXAMPLE = "POSITIVE_EXAMPLE"
NEGATIVE_EXAMPLE = "NEGATIVE_EXAMPLE"
EDGE_CASE = "EDGE_CASE"
class CorpusMarketState(str, enum.Enum):
ACTIVE = "active"
CLOSED = "closed"
RESOLVED = "resolved"
DISPUTED = "disputed"
ARCHIVED = "archived"
# ---------------------------------------------------------------------------
# 11. corpus_markets
# ---------------------------------------------------------------------------
class CorpusMarket(SQLModel, table=True):
"""All Polymarket markets (open + resolved) — source-of-truth corpus."""
__tablename__ = "corpus_markets"
__table_args__ = (
CheckConstraint(
"state != 'resolved' OR outcome IS NOT NULL",
name="resolved_has_outcome",
),
CheckConstraint(
"end_date IS NULL OR created_at IS NULL OR end_date >= created_at",
name="time_order",
),
)
market_id: str = Field(primary_key=True)
question: str
category: Optional[str] = Field(default=None, index=True)
subcategory: Optional[str] = None
tags: Optional[list[str]] = Field(default=None, sa_column=Column(JSON))
created_at: Optional[datetime] = None
end_date: Optional[datetime] = None
resolved_at: Optional[datetime] = Field(default=None, index=True)
state: str = Field(default=CorpusMarketState.ACTIVE.value, index=True)
outcome: Optional[str] = Field(default=None, index=True)
outcome_prices: Optional[list[float]] = Field(default=None, sa_column=Column(JSON))
total_volume_usdc: Optional[float] = None
uma_dispute: bool = False
resolution_source: Optional[str] = None
is_community_created: bool = False
embedding_idx: Optional[int] = Field(default=None, index=True)
framing_pattern: Optional[str] = Field(default=None, index=True)
# ---------------------------------------------------------------------------
# 12. few_shot_exemplars
# ---------------------------------------------------------------------------
class FewShotExemplar(SQLModel, table=True):
"""Exemplars used by LLM judges for in-context learning."""
__tablename__ = "few_shot_exemplars"
id: Optional[int] = Field(default=None, primary_key=True)
market_id: Optional[str] = Field(
default=None, foreign_key="corpus_markets.market_id", index=True
)
judge_dimension: str = Field(index=True)
role: str = Field(default=FewShotRole.POSITIVE_EXAMPLE.value, index=True)
question_text: str
explanation: str
weight: float = 1.0
# ---------------------------------------------------------------------------
# 13. style_rules
# ---------------------------------------------------------------------------
class StyleRule(SQLModel, table=True):
"""Distilled style-guide bullets — LLM-distilled from corpus."""
__tablename__ = "style_rules"
id: Optional[int] = Field(default=None, primary_key=True)
rule_text: str
dimension: Optional[str] = Field(default=None, index=True)
source: str = "llm_distilled"
confidence: float = 1.0
# ---------------------------------------------------------------------------
# 14. reference_translations
# ---------------------------------------------------------------------------
class ReferenceTranslation(SQLModel, table=True):
"""Human-verified reference translations for demo samples (D-judge gold)."""
__tablename__ = "reference_translations"
sample_id: int = Field(primary_key=True)
source_chinese: str
primary_translation: str
alternative_phrasings: list[str] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
k5_framing_variants: list[str] = Field(
default_factory=list, sa_column=Column(JSON, nullable=False)
)
expected_bleu_threshold: float = 25.0
expected_comet_threshold: float = 0.55
polymarket_shape_validation: Optional[dict[str, Any]] = Field(
default=None, sa_column=Column(JSON)
)
annotator_notes: Optional[str] = None
# ---------------------------------------------------------------------------
# 15. backtest_results
# ---------------------------------------------------------------------------
class BacktestResult(SQLModel, table=True):
"""Per-event backtest record: agent prediction vs resolved outcome."""
__tablename__ = "backtest_results"
id: Optional[int] = Field(default=None, primary_key=True)
market_id: str = Field(foreign_key="corpus_markets.market_id", index=True)
agent_address: str = Field(index=True)
predicted_outcome: Optional[str] = None
actual_outcome: str
correct: bool = False
estimated_profit_usdc: float = 0.0
# ``judge_verdict`` + ``backtested_at`` previously had per-column indexes
# that were never queried — dropped to reduce write overhead.
judge_verdict: str = JudgeVerdict.PENDING.value
judge_score: float = 0.0
notes: Optional[str] = None
backtested_at: datetime = Field(default_factory=_utcnow)
__all__ = [
"AgentReputation",
"Auction",
"BacktestResult",
"Bid",
"BuilderFeeEvent",
"CorpusMarket",
"CorpusMarketState",
"Event",
"EventStatus",
"FewShotExemplar",
"FewShotRole",
"JudgeVerdict",
"PolymarketStatus",
"PolymarketSubmission",
"QualityScore",
"Question",
"ReferenceTranslation",
"Source",
"SourceStatus",
"StyleRule",
"Translation",
]