File size: 9,604 Bytes
57a6af0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# trade_analysis/deploy.py
"""

Deployment configuration for different environments

Run same code on HPC, local, or cloud

"""

import os
import torch
from enum import Enum
from dataclasses import dataclass
from typing import Optional

class DeploymentMode(Enum):
    HPC = "hpc"           # Full models, unlimited resources
    LOCAL = "local"       # Quantized models, CPU/small GPU
    CLOUD = "cloud"       # RunPod/Colab, medium resources
    SERVERLESS = "serverless"  # Lambda/Vercel, minimal

@dataclass
class DeploymentConfig:
    """Adaptive configuration based on environment"""
    mode: DeploymentMode
    device: str
    max_gpu_memory: Optional[int]  # GB
    quantization: bool
    batch_size: int
    cache_dir: str
    
    @classmethod
    def auto_detect(cls):
        """Automatically detect and configure environment"""
        
        # Check for HPC markers
        if os.path.exists("/scratch") or "SLURM_JOB_ID" in os.environ:
            return cls(
                mode=DeploymentMode.HPC,
                device="cuda",
                max_gpu_memory=80,  # H100 has 80GB
                quantization=False,
                batch_size=32,
                cache_dir="/scratch/models"
            )
        
        # Check for Colab
        elif 'COLAB_GPU' in os.environ:
            return cls(
                mode=DeploymentMode.CLOUD,
                device="cuda" if torch.cuda.is_available() else "cpu",
                max_gpu_memory=15,  # T4 has 15GB
                quantization=True,
                batch_size=8,
                cache_dir="/content/models"
            )
        
        # Check for RunPod
        elif 'RUNPOD_POD_ID' in os.environ:
            gpu_mem = torch.cuda.get_device_properties(0).total_memory // 1e9 if torch.cuda.is_available() else 0
            return cls(
                mode=DeploymentMode.CLOUD,
                device="cuda" if torch.cuda.is_available() else "cpu",
                max_gpu_memory=int(gpu_mem),
                quantization=gpu_mem < 24,  # Quantize if less than 24GB
                batch_size=16,
                cache_dir="/workspace/models"
            )
        
        # Local machine
        else:
            has_gpu = torch.cuda.is_available()
            gpu_mem = torch.cuda.get_device_properties(0).total_memory // 1e9 if has_gpu else 0
            
            return cls(
                mode=DeploymentMode.LOCAL,
                device="cuda" if has_gpu else "cpu",
                max_gpu_memory=int(gpu_mem) if has_gpu else None,
                quantization=True,  # Always quantize locally
                batch_size=4,
                cache_dir="./models"
            )

class ScalableModels:
    """Load models based on available resources"""
    
    def __init__(self):
        self.config = DeploymentConfig.auto_detect()
        print(f"πŸ”§ Deployment Mode: {self.config.mode.value}")
        print(f"πŸ”§ Device: {self.config.device}")
        print(f"πŸ”§ Quantization: {self.config.quantization}")
    
    def load_llm(self):
        """Load LLM based on available resources"""
        
        if self.config.mode == DeploymentMode.HPC:
            # Full precision, large models
            from transformers import AutoModelForCausalLM, AutoTokenizer
            
            model_id = "mistralai/Mistral-Nemo-Instruct-2407"  # 12B model
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.float16,
                device_map="auto",
                cache_dir=self.config.cache_dir
            )
            
        elif self.config.mode == DeploymentMode.CLOUD:
            # Quantized medium models
            from transformers import AutoModelForCausalLM, BitsAndBytesConfig
            
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_quant_type="nf4"
            )
            
            model_id = "mistralai/Mistral-7B-Instruct-v0.2"  # 7B model
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                quantization_config=quantization_config,
                device_map="auto",
                cache_dir=self.config.cache_dir
            )
            
        elif self.config.mode == DeploymentMode.LOCAL:
            # Small, efficient models
            from transformers import AutoModelForCausalLM
            
            if self.config.device == "cuda" and self.config.max_gpu_memory >= 6:
                # Use Phi-3 for small GPUs
                model_id = "microsoft/phi-2"  # 2.7B model
                model = AutoModelForCausalLM.from_pretrained(
                    model_id,
                    torch_dtype=torch.float16,
                    device_map="auto",
                    cache_dir=self.config.cache_dir
                )
            else:
                # CPU-only: Use GGUF quantized models with llama.cpp
                print("πŸ’‘ For CPU, use llama.cpp with GGUF models")
                return None
        
        else:  # SERVERLESS
            # Use API endpoints instead
            print("πŸ’‘ Use HuggingFace Inference API for serverless")
            return None
        
        return model
    
    def load_sentiment_models(self):
        """Load sentiment models based on resources"""
        
        models = []
        
        if self.config.mode in [DeploymentMode.HPC, DeploymentMode.CLOUD]:
            # Load all 5 models
            model_ids = [
                'ProsusAI/finbert',
                'yiyanghkust/finbert-tone',
                'cardiffnlp/twitter-roberta-base-sentiment-latest'
            ]
        else:
            # Load only the best model
            model_ids = ['ProsusAI/finbert']
        
        from transformers import AutoModelForSequenceClassification
        
        for model_id in model_ids:
            try:
                model = AutoModelForSequenceClassification.from_pretrained(
                    model_id,
                    cache_dir=self.config.cache_dir
                ).to(self.config.device)
                models.append(model)
            except:
                pass
        
        return models
    
    def load_tft_model(self, symbol: str):
        """Load TFT with appropriate settings"""
        from .tft_model import GapPredictionTFT
        
        model = GapPredictionTFT()
        
        # Adjust model size based on resources
        if self.config.mode == DeploymentMode.LOCAL:
            # Reduce model size for local
            model.model.hidden_size = 64  # Halve the hidden size
            model.model.lstm_layers = 1   # Reduce LSTM layers
        
        # Try to load pretrained
        model_path = f"{self.config.cache_dir}/tft_{symbol}.pth"
        if os.path.exists(model_path):
            model.load_pretrained(path=model_path)
        
        return model

# Lightweight agent for production
class ProductionAgent:
    """Minimal agent that works everywhere"""
    
    def __init__(self):
        self.models = ScalableModels()
        self.config = self.models.config
        
    async def run_on_schedule(self):
        """Run analysis on schedule based on resources"""
        
        if self.config.mode == DeploymentMode.HPC:
            # Run every 5 minutes during market hours
            interval = 300
        elif self.config.mode == DeploymentMode.CLOUD:
            # Run every 15 minutes
            interval = 900
        else:
            # Run every 30 minutes locally
            interval = 1800
        
        while True:
            await self.analyze_markets()
            await asyncio.sleep(interval)
    
    async def analyze_markets(self):
        """Lightweight market analysis"""
        
        symbols = ['QQQ', 'SPY', 'NVDA']
        
        for symbol in symbols:
            # Quick signal check using yfinance only
            signal = await self.quick_signal(symbol)
            
            if signal['confidence'] > 75:
                print(f"🎯 SIGNAL: {symbol} - {signal['action']} ({signal['confidence']}%)")
                
                # Save to file for manual review
                with open('signals.txt', 'a') as f:
                    f.write(f"{datetime.now()},{symbol},{signal['action']},{signal['confidence']}\n")
    
    async def quick_signal(self, symbol: str):
        """Ultra-light signal generation"""
        
        import yfinance as yf
        ticker = yf.Ticker(symbol)
        
        # Get recent data
        df = ticker.history(period='1d', interval='5m')
        if df.empty:
            return {'action': 'HOLD', 'confidence': 0}
        
        # Simple momentum
        close = df['Close']
        returns = (close.iloc[-1] / close.iloc[-10] - 1) if len(close) >= 10 else 0
        
        # Volume check
        vol_ratio = df['Volume'].iloc[-1] / df['Volume'].mean()
        
        # Decision
        if returns > 0.005 and vol_ratio > 1.5:
            return {'action': 'CALLS', 'confidence': 70 + min(30, returns * 1000)}
        elif returns < -0.005 and vol_ratio > 1.5:
            return {'action': 'PUTS', 'confidence': 70 + min(30, abs(returns) * 1000)}
        else:
            return {'action': 'HOLD', 'confidence': 50}