Spaces:
Running
Running
File size: 9,192 Bytes
aceffd1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | import os
import sqlite3
import time
import pandas as pd
from typing import Optional
from sqlalchemy import create_engine, Column, String, Float, Date, UniqueConstraint, DateTime, JSON, Integer
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.dialects.postgresql import insert
from config import logger, OUTPUT_DIR
Base = declarative_base()
class DailyPrice(Base):
__tablename__ = 'daily_prices'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
close_price = Column(Float, nullable=False)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_daily_prices_ticker_date'),)
class DailyYield(Base):
__tablename__ = 'daily_yields'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
yield_pct = Column(Float)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_daily_yields_ticker_date'),)
class StitchMetadata(Base):
__tablename__ = 'stitch_metadata'
ticker = Column(String, primary_key=True)
date = Column(Date, primary_key=True)
source = Column(String) # 'direct', 'proxy_stitched', 'synthetic'
proxy_used = Column(String)
adjustment_factor = Column(Float)
__table_args__ = (UniqueConstraint('ticker', 'date', name='uq_stitch_metadata_ticker_date'),)
import uuid
import datetime
class AssetFundamentals(Base):
__tablename__ = "asset_fundamentals"
ticker = Column(String, primary_key=True, index=True)
pe_ratio = Column(Float, nullable=True)
fcf_yield = Column(Float, nullable=True)
operating_margin = Column(Float, nullable=True)
implied_volatility = Column(Float, nullable=True)
updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)
class SavedPortfolio(Base):
__tablename__ = "saved_portfolios"
id = Column(Integer, primary_key=True, index=True)
username = Column(String, index=True)
name = Column(String, nullable=False)
tickers = Column(JSON, nullable=False)
weights = Column(JSON, nullable=False)
html_report = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow)
class BacktestHistory(Base):
__tablename__ = "backtest_history"
id = Column(Integer, primary_key=True, index=True)
username = Column(String, index=True)
model_used = Column(String, nullable=False)
return_pct = Column(Float, nullable=False)
sharpe_ratio = Column(Float, nullable=False)
max_drawdown = Column(Float, nullable=False)
tickers = Column(JSON, nullable=True)
weights = Column(JSON, nullable=True)
html_report = Column(String, nullable=True)
executed_at = Column(DateTime, default=datetime.datetime.utcnow)
class UserMemory(Base):
__tablename__ = "user_memory"
id = Column(Integer, primary_key=True, index=True)
username = Column(String, index=True, unique=True)
memory_text = Column(String, nullable=False)
updated_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow)
class WebhookConfig(Base):
__tablename__ = "webhook_configs"
id = Column(Integer, primary_key=True, index=True)
username = Column(String, unique=True, index=True)
webhook_url = Column(String, nullable=True)
api_secret_key = Column(String, unique=True, nullable=False)
class AuditLog(Base):
__tablename__ = 'audit_log'
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String, nullable=True)
endpoint = Column(String, nullable=False)
request_hash = Column(String, nullable=True)
request_body = Column(JSON, nullable=True)
response_weights = Column(JSON, nullable=True)
timestamp = Column(DateTime, default=datetime.datetime.utcnow)
ip_address = Column(String, nullable=True)
_ENGINE = None
def with_db_retry(max_retries=3):
"""Decorator to retry database operations on transient failures."""
def decorator(func):
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except SQLAlchemyError as e:
if attempt == max_retries - 1:
raise
logger.warning(f"Database operation failed: {e}. Retrying ({attempt + 1}/{max_retries})...")
time.sleep(1.0 * (2 ** attempt)) # Exponential backoff
return wrapper
return decorator
def get_pg_engine():
"""
Creates and returns a singleton SQLAlchemy engine for PostgreSQL.
Expects DATABASE_URL to be set in the environment, falling back to local defaults if missing.
"""
global _ENGINE
if _ENGINE is not None:
return _ENGINE
import time
from sqlalchemy import text
db_url = os.getenv("DATABASE_URL")
if not db_url:
db_url = f"sqlite:///{os.path.join(OUTPUT_DIR, 'portfolio_db.sqlite3')}"
if db_url.startswith("sqlite"):
_ENGINE = create_engine(db_url, echo=False)
return _ENGINE
retries = 3
for attempt in range(retries):
try:
_ENGINE = create_engine(db_url, echo=False, pool_size=10, max_overflow=20, pool_pre_ping=True, pool_recycle=3600, connect_args={'connect_timeout': 5})
with _ENGINE.connect() as conn:
conn.execute(text("SELECT 1"))
return _ENGINE
except Exception as e:
logger.warning(f"Database connection attempt {attempt + 1} failed: {e}")
if attempt == retries - 1:
logger.error("All PostgreSQL connection attempts failed. Falling back to SQLite.")
_ENGINE = create_engine(f"sqlite:///{os.path.join(OUTPUT_DIR, 'portfolio_db.sqlite3')}", echo=False)
return _ENGINE
time.sleep(2 * (attempt + 1))
def init_db():
"""Initializes the database schema (Creates tables if they don't exist)."""
engine = get_pg_engine()
Base.metadata.create_all(engine)
logger.info("PostgreSQL Database schema initialized.")
def migrate_sqlite_to_postgres(sqlite_path: Optional[str] = None):
"""
Reads the legacy SQLite finance database and bulk inserts all historical
price and yield records into the new PostgreSQL database.
"""
if sqlite_path is None:
sqlite_path = os.path.join(OUTPUT_DIR, "finance_data.db")
if not os.path.exists(sqlite_path):
logger.warning(f"Legacy SQLite database not found at {sqlite_path}. Nothing to migrate.")
return
logger.info(f"Starting migration from SQLite ({sqlite_path}) to PostgreSQL...")
# 1. Connect to SQLite
sqlite_conn = sqlite3.connect(sqlite_path)
# 2. Extract Data
try:
prices_df = pd.read_sql("SELECT ticker, date, close_price FROM daily_prices", sqlite_conn)
logger.info(f"Extracted {len(prices_df)} records from SQLite daily_prices.")
except Exception as e:
logger.warning(f"Could not read daily_prices from SQLite: {e}")
prices_df = pd.DataFrame()
try:
yields_df = pd.read_sql("SELECT ticker, date, yield_pct FROM daily_yields", sqlite_conn)
logger.info(f"Extracted {len(yields_df)} records from SQLite daily_yields.")
except Exception as e:
logger.warning(f"Could not read daily_yields from SQLite: {e}")
yields_df = pd.DataFrame()
sqlite_conn.close()
# 3. Connect to Postgres & Initialize schema
init_db()
pg_engine = get_pg_engine()
# 4. Transform and Load
Session = sessionmaker(bind=pg_engine)
session = Session()
try:
# We use pd.DataFrame.to_sql for massive bulk insert performance.
# Convert date strings to actual dates first
def insert_on_conflict_nothing(table, conn, keys, data_iter):
data = [dict(zip(keys, row)) for row in data_iter]
stmt = insert(table.table).values(data).on_conflict_do_nothing()
result = conn.execute(stmt)
return result.rowcount
if not prices_df.empty:
prices_df['date'] = pd.to_datetime(prices_df['date']).dt.date
prices_df.to_sql('daily_prices', pg_engine, if_exists='append', index=False, method=insert_on_conflict_nothing, chunksize=10000)
logger.info("Successfully migrated daily_prices to PostgreSQL.")
if not yields_df.empty:
yields_df['date'] = pd.to_datetime(yields_df['date']).dt.date
yields_df.to_sql('daily_yields', pg_engine, if_exists='append', index=False, method=insert_on_conflict_nothing, chunksize=10000)
logger.info("Successfully migrated daily_yields to PostgreSQL.")
except Exception as e:
logger.error(f"Migration failed during PostgreSQL insertion: {e}")
session.rollback()
finally:
session.close()
logger.info("Migration routine complete.")
if __name__ == "__main__":
# If run standalone, execute the migration
migrate_sqlite_to_postgres()
|