""" SQLAlchemy models for PDF storage and prediction tracking. """ from datetime import datetime from typing import Dict, Any, List import json import pickle import numpy as np from sqlalchemy import ( Column, Integer, String, Float, DateTime, Boolean, Text, LargeBinary, ForeignKey, Index ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship Base = declarative_base() class PDFSnapshot(Base): """ Stores historical PDF snapshots with all associated data. Each snapshot represents the option-implied probability distribution at a specific point in time for a specific expiration. """ __tablename__ = 'pdf_snapshots' # Primary key id = Column(Integer, primary_key=True, autoincrement=True) # Timestamp and identification timestamp = Column(DateTime, nullable=False, index=True) ticker = Column(String(10), nullable=False, index=True) # Market data spot_price = Column(Float, nullable=False) days_to_expiry = Column(Integer, nullable=False) expiration_date = Column(DateTime, nullable=False) # Risk-free rate risk_free_rate = Column(Float, nullable=False) # PDF data (stored as binary) strikes = Column(LargeBinary, nullable=False) # NumPy array pickled pdf_values = Column(LargeBinary, nullable=False) # NumPy array pickled # SABR parameters (if used) sabr_alpha = Column(Float, nullable=True) sabr_rho = Column(Float, nullable=True) sabr_nu = Column(Float, nullable=True) sabr_beta = Column(Float, nullable=True) interpolation_method = Column(String(20), nullable=True) # 'sabr' or 'spline' # Statistics (stored as JSON) statistics = Column(Text, nullable=False) # JSON string # AI interpretation interpretation = Column(Text, nullable=True) interpretation_mode = Column(String(20), nullable=True) # 'standard', 'conservative', etc. model_used = Column(String(50), nullable=True) # 'ollama' or 'fallback' # Relationships pattern_matches = relationship('PatternMatch', foreign_keys='PatternMatch.current_snapshot_id', back_populates='current_snapshot') predictions = relationship('Prediction', back_populates='snapshot') # Indexes for common queries __table_args__ = ( Index('idx_ticker_timestamp', 'ticker', 'timestamp'), Index('idx_ticker_expiry', 'ticker', 'days_to_expiry'), ) def __repr__(self): return f"" def get_strikes(self) -> np.ndarray: """Deserialize strikes from binary.""" return pickle.loads(self.strikes) def set_strikes(self, strikes: np.ndarray): """Serialize strikes to binary.""" self.strikes = pickle.dumps(strikes) def get_pdf_values(self) -> np.ndarray: """Deserialize PDF values from binary.""" return pickle.loads(self.pdf_values) def set_pdf_values(self, pdf_values: np.ndarray): """Serialize PDF values to binary.""" self.pdf_values = pickle.dumps(pdf_values) def get_statistics(self) -> Dict[str, Any]: """Deserialize statistics from JSON.""" return json.loads(self.statistics) def set_statistics(self, stats: Dict[str, Any]): """Serialize statistics to JSON.""" self.statistics = json.dumps(stats) def to_dict(self) -> Dict[str, Any]: """Convert snapshot to dictionary.""" return { 'id': self.id, 'timestamp': self.timestamp.isoformat(), 'ticker': self.ticker, 'spot_price': self.spot_price, 'days_to_expiry': self.days_to_expiry, 'expiration_date': self.expiration_date.isoformat(), 'risk_free_rate': self.risk_free_rate, 'strikes': self.get_strikes().tolist(), 'pdf_values': self.get_pdf_values().tolist(), 'sabr_params': { 'alpha': self.sabr_alpha, 'rho': self.sabr_rho, 'nu': self.sabr_nu, 'beta': self.sabr_beta, }, 'interpolation_method': self.interpolation_method, 'statistics': self.get_statistics(), 'interpretation': self.interpretation, 'interpretation_mode': self.interpretation_mode, 'model_used': self.model_used, } class Prediction(Base): """ Tracks predictions made from PDFs and their outcomes. Used to evaluate the accuracy of option-implied probabilities by comparing forecasted probabilities with actual outcomes. """ __tablename__ = 'predictions' # Primary key id = Column(Integer, primary_key=True, autoincrement=True) # Link to PDF snapshot snapshot_id = Column(Integer, ForeignKey('pdf_snapshots.id'), nullable=False) # Forecast details forecast_date = Column(DateTime, nullable=False, index=True) target_date = Column(DateTime, nullable=False, index=True) ticker = Column(String(10), nullable=False) # Prediction condition = Column(String(20), nullable=False) # 'above', 'below', 'between' target_level = Column(Float, nullable=False) # Strike or price level target_level_upper = Column(Float, nullable=True) # For 'between' condition predicted_probability = Column(Float, nullable=False) # 0.0 to 1.0 # Actual outcome evaluation_date = Column(DateTime, nullable=True) # When we evaluated actual_price = Column(Float, nullable=True) # Actual price at target_date actual_outcome = Column(Boolean, nullable=True) # True if condition met # Accuracy metrics accuracy_score = Column(Float, nullable=True) # Brier score or similar # Additional context notes = Column(Text, nullable=True) # Relationship snapshot = relationship('PDFSnapshot', back_populates='predictions') # Indexes __table_args__ = ( Index('idx_ticker_target_date', 'ticker', 'target_date'), ) def __repr__(self): return f"" def calculate_brier_score(self) -> float: """ Calculate Brier score for this prediction. Brier Score = (predicted_prob - actual_outcome)^2 Lower is better (0 = perfect forecast) """ if self.actual_outcome is None: return None actual = 1.0 if self.actual_outcome else 0.0 brier = (self.predicted_probability - actual) ** 2 self.accuracy_score = brier return brier def to_dict(self) -> Dict[str, Any]: """Convert prediction to dictionary.""" return { 'id': self.id, 'snapshot_id': self.snapshot_id, 'forecast_date': self.forecast_date.isoformat(), 'target_date': self.target_date.isoformat(), 'ticker': self.ticker, 'condition': self.condition, 'target_level': self.target_level, 'target_level_upper': self.target_level_upper, 'predicted_probability': self.predicted_probability, 'evaluation_date': self.evaluation_date.isoformat() if self.evaluation_date else None, 'actual_price': self.actual_price, 'actual_outcome': self.actual_outcome, 'accuracy_score': self.accuracy_score, 'notes': self.notes, } class PatternMatch(Base): """ Stores pattern matching results between current and historical PDFs. Used to find similar market conditions and compare outcomes. """ __tablename__ = 'pattern_matches' # Primary key id = Column(Integer, primary_key=True, autoincrement=True) # Current snapshot being analyzed current_snapshot_id = Column(Integer, ForeignKey('pdf_snapshots.id'), nullable=False) # Historical snapshot that matched historical_snapshot_id = Column(Integer, ForeignKey('pdf_snapshots.id'), nullable=False) # Match timestamp match_timestamp = Column(DateTime, nullable=False, default=datetime.utcnow) # Similarity scores overall_similarity = Column(Float, nullable=False) # Combined score shape_similarity = Column(Float, nullable=False) # Cosine similarity stats_similarity = Column(Float, nullable=False) # Statistical features similarity # Match rank (1 = best match) match_rank = Column(Integer, nullable=False) # Description of historical pattern description = Column(Text, nullable=True) # Relationship current_snapshot = relationship('PDFSnapshot', foreign_keys=[current_snapshot_id], back_populates='pattern_matches') historical_snapshot = relationship('PDFSnapshot', foreign_keys=[historical_snapshot_id]) # Indexes __table_args__ = ( Index('idx_current_snapshot', 'current_snapshot_id'), Index('idx_similarity', 'overall_similarity'), ) def __repr__(self): return f"" def to_dict(self) -> Dict[str, Any]: """Convert pattern match to dictionary.""" return { 'id': self.id, 'current_snapshot_id': self.current_snapshot_id, 'historical_snapshot_id': self.historical_snapshot_id, 'match_timestamp': self.match_timestamp.isoformat(), 'overall_similarity': self.overall_similarity, 'shape_similarity': self.shape_similarity, 'stats_similarity': self.stats_similarity, 'match_rank': self.match_rank, 'description': self.description, } if __name__ == "__main__": # Print schema for documentation print("=" * 80) print("PDF VISUALIZER DATABASE SCHEMA") print("=" * 80) print("\nTABLE: pdf_snapshots") print("-" * 80) print("Stores historical PDF snapshots with all associated data") print("\nColumns:") for column in PDFSnapshot.__table__.columns: print(f" - {column.name}: {column.type} {'(PK)' if column.primary_key else ''}") print("\n\nTABLE: predictions") print("-" * 80) print("Tracks predictions and their outcomes for accuracy evaluation") print("\nColumns:") for column in Prediction.__table__.columns: print(f" - {column.name}: {column.type} {'(PK)' if column.primary_key else ''}") print("\n\nTABLE: pattern_matches") print("-" * 80) print("Stores pattern matching results between PDFs") print("\nColumns:") for column in PatternMatch.__table__.columns: print(f" - {column.name}: {column.type} {'(PK)' if column.primary_key else ''}") print("\n" + "=" * 80)