Spaces:

ckharche
/

ProfitBook

Sleeping

App Files Files Community

ProfitBook / trade_analysis /deploy.py

ckharche

Upload 14 files

57a6af0 verified 5 months ago

raw

history blame contribute delete

9.6 kB

	# trade_analysis/deploy.py
	"""
	Deployment configuration for different environments
	Run same code on HPC, local, or cloud
	"""

	import os
	import torch
	from enum import Enum
	from dataclasses import dataclass
	from typing import Optional

	class DeploymentMode(Enum):
	HPC = "hpc" # Full models, unlimited resources
	LOCAL = "local" # Quantized models, CPU/small GPU
	CLOUD = "cloud" # RunPod/Colab, medium resources
	SERVERLESS = "serverless" # Lambda/Vercel, minimal

	@dataclass
	class DeploymentConfig:
	"""Adaptive configuration based on environment"""
	mode: DeploymentMode
	device: str
	max_gpu_memory: Optional[int] # GB
	quantization: bool
	batch_size: int
	cache_dir: str

	@classmethod
	def auto_detect(cls):
	"""Automatically detect and configure environment"""

	# Check for HPC markers
	if os.path.exists("/scratch") or "SLURM_JOB_ID" in os.environ:
	return cls(
	mode=DeploymentMode.HPC,
	device="cuda",
	max_gpu_memory=80, # H100 has 80GB
	quantization=False,
	batch_size=32,
	cache_dir="/scratch/models"
	)

	# Check for Colab
	elif 'COLAB_GPU' in os.environ:
	return cls(
	mode=DeploymentMode.CLOUD,
	device="cuda" if torch.cuda.is_available() else "cpu",
	max_gpu_memory=15, # T4 has 15GB
	quantization=True,
	batch_size=8,
	cache_dir="/content/models"
	)

	# Check for RunPod
	elif 'RUNPOD_POD_ID' in os.environ:
	gpu_mem = torch.cuda.get_device_properties(0).total_memory // 1e9 if torch.cuda.is_available() else 0
	return cls(
	mode=DeploymentMode.CLOUD,
	device="cuda" if torch.cuda.is_available() else "cpu",
	max_gpu_memory=int(gpu_mem),
	quantization=gpu_mem < 24, # Quantize if less than 24GB
	batch_size=16,
	cache_dir="/workspace/models"
	)

	# Local machine
	else:
	has_gpu = torch.cuda.is_available()
	gpu_mem = torch.cuda.get_device_properties(0).total_memory // 1e9 if has_gpu else 0

	return cls(
	mode=DeploymentMode.LOCAL,
	device="cuda" if has_gpu else "cpu",
	max_gpu_memory=int(gpu_mem) if has_gpu else None,
	quantization=True, # Always quantize locally
	batch_size=4,
	cache_dir="./models"
	)

	class ScalableModels:
	"""Load models based on available resources"""

	def __init__(self):
	self.config = DeploymentConfig.auto_detect()
	print(f"🔧 Deployment Mode: {self.config.mode.value}")
	print(f"🔧 Device: {self.config.device}")
	print(f"🔧 Quantization: {self.config.quantization}")

	def load_llm(self):
	"""Load LLM based on available resources"""

	if self.config.mode == DeploymentMode.HPC:
	# Full precision, large models
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_id = "mistralai/Mistral-Nemo-Instruct-2407" # 12B model
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	cache_dir=self.config.cache_dir
	)

	elif self.config.mode == DeploymentMode.CLOUD:
	# Quantized medium models
	from transformers import AutoModelForCausalLM, BitsAndBytesConfig

	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_quant_type="nf4"
	)

	model_id = "mistralai/Mistral-7B-Instruct-v0.2" # 7B model
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	quantization_config=quantization_config,
	device_map="auto",
	cache_dir=self.config.cache_dir
	)

	elif self.config.mode == DeploymentMode.LOCAL:
	# Small, efficient models
	from transformers import AutoModelForCausalLM

	if self.config.device == "cuda" and self.config.max_gpu_memory >= 6:
	# Use Phi-3 for small GPUs
	model_id = "microsoft/phi-2" # 2.7B model
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	cache_dir=self.config.cache_dir
	)
	else:
	# CPU-only: Use GGUF quantized models with llama.cpp
	print("💡 For CPU, use llama.cpp with GGUF models")
	return None

	else: # SERVERLESS
	# Use API endpoints instead
	print("💡 Use HuggingFace Inference API for serverless")
	return None

	return model

	def load_sentiment_models(self):
	"""Load sentiment models based on resources"""

	models = []

	if self.config.mode in [DeploymentMode.HPC, DeploymentMode.CLOUD]:
	# Load all 5 models
	model_ids = [
	'ProsusAI/finbert',
	'yiyanghkust/finbert-tone',
	'cardiffnlp/twitter-roberta-base-sentiment-latest'
	]
	else:
	# Load only the best model
	model_ids = ['ProsusAI/finbert']

	from transformers import AutoModelForSequenceClassification

	for model_id in model_ids:
	try:
	model = AutoModelForSequenceClassification.from_pretrained(
	model_id,
	cache_dir=self.config.cache_dir
	).to(self.config.device)
	models.append(model)
	except:
	pass

	return models

	def load_tft_model(self, symbol: str):
	"""Load TFT with appropriate settings"""
	from .tft_model import GapPredictionTFT

	model = GapPredictionTFT()

	# Adjust model size based on resources
	if self.config.mode == DeploymentMode.LOCAL:
	# Reduce model size for local
	model.model.hidden_size = 64 # Halve the hidden size
	model.model.lstm_layers = 1 # Reduce LSTM layers

	# Try to load pretrained
	model_path = f"{self.config.cache_dir}/tft_{symbol}.pth"
	if os.path.exists(model_path):
	model.load_pretrained(path=model_path)

	return model

	# Lightweight agent for production
	class ProductionAgent:
	"""Minimal agent that works everywhere"""

	def __init__(self):
	self.models = ScalableModels()
	self.config = self.models.config

	async def run_on_schedule(self):
	"""Run analysis on schedule based on resources"""

	if self.config.mode == DeploymentMode.HPC:
	# Run every 5 minutes during market hours
	interval = 300
	elif self.config.mode == DeploymentMode.CLOUD:
	# Run every 15 minutes
	interval = 900
	else:
	# Run every 30 minutes locally
	interval = 1800

	while True:
	await self.analyze_markets()
	await asyncio.sleep(interval)

	async def analyze_markets(self):
	"""Lightweight market analysis"""

	symbols = ['QQQ', 'SPY', 'NVDA']

	for symbol in symbols:
	# Quick signal check using yfinance only
	signal = await self.quick_signal(symbol)

	if signal['confidence'] > 75:
	print(f"🎯 SIGNAL: {symbol} - {signal['action']} ({signal['confidence']}%)")

	# Save to file for manual review
	with open('signals.txt', 'a') as f:
	f.write(f"{datetime.now()},{symbol},{signal['action']},{signal['confidence']}\n")

	async def quick_signal(self, symbol: str):
	"""Ultra-light signal generation"""

	import yfinance as yf
	ticker = yf.Ticker(symbol)

	# Get recent data
	df = ticker.history(period='1d', interval='5m')
	if df.empty:
	return {'action': 'HOLD', 'confidence': 0}

	# Simple momentum
	close = df['Close']
	returns = (close.iloc[-1] / close.iloc[-10] - 1) if len(close) >= 10 else 0

	# Volume check
	vol_ratio = df['Volume'].iloc[-1] / df['Volume'].mean()

	# Decision
	if returns > 0.005 and vol_ratio > 1.5:
	return {'action': 'CALLS', 'confidence': 70 + min(30, returns * 1000)}
	elif returns < -0.005 and vol_ratio > 1.5:
	return {'action': 'PUTS', 'confidence': 70 + min(30, abs(returns) * 1000)}
	else:
	return {'action': 'HOLD', 'confidence': 50}