OmidSakaki commited on
Commit
ef8a89d
·
verified ·
1 Parent(s): 2965337

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -707
app.py CHANGED
@@ -5,717 +5,19 @@ import torch.nn as nn
5
  import torch.optim as optim
6
  from collections import deque
7
  import random
8
- from pathlib import Path
9
  from typing import Dict, Tuple, Any, List, Optional
10
  import plotly.graph_objects as go
11
  from plotly.subplots import make_subplots
12
- import yaml
13
 
14
- # Configuration class for trading parameters
15
- class TradingConfig:
16
- """
17
- Configuration class for trading environment and agent parameters.
18
- Centralizes all configurable parameters for easy modification.
19
- """
20
- def __init__(self):
21
- # Environment parameters
22
- self.initial_balance = 10000.0
23
- self.max_steps = 1000
24
- self.transaction_cost = 0.001
25
- self.risk_level = "Medium"
26
- self.asset_type = "Crypto"
27
-
28
- # DQN agent parameters
29
- self.learning_rate = 0.0001
30
- self.gamma = 0.99 # Discount factor
31
- self.epsilon_start = 1.0
32
- self.epsilon_min = 0.01
33
- self.epsilon_decay = 0.9995
34
- self.batch_size = 32
35
- self.memory_size = 10000
36
- self.target_update = 100
37
- self.hidden_size = 128
38
-
39
- # Risk adjustment factors based on risk level
40
- self.risk_multipliers = {
41
- "Low": 0.5,
42
- "Medium": 1.0,
43
- "High": 2.0
44
- }
45
-
46
- class AdvancedTradingEnvironment:
47
- """
48
- Advanced trading environment simulating financial markets with multiple assets.
49
- Supports crypto, stocks, and forex with realistic price movements and sentiment analysis.
50
- """
51
-
52
- def __init__(self, config: TradingConfig):
53
- self.config = config
54
- self.initial_balance = config.initial_balance
55
- self.balance = self.initial_balance
56
- self.position = 0.0
57
- self.current_price = 100.0
58
- self.step_count = 0
59
- self.max_steps = config.max_steps
60
- self.transaction_cost = config.transaction_cost
61
-
62
- # Market data history
63
- self.price_history = []
64
- self.volume_history = []
65
- self.sentiment_history = []
66
-
67
- # Risk adjustment
68
- self.risk_multiplier = config.risk_multipliers[config.risk_level]
69
-
70
- # Initialize market data
71
- self._initialize_market_data()
72
-
73
- # Define action and observation spaces
74
- self.action_space = 4 # 0: Hold, 1: Buy, 2: Sell, 3: Close Position
75
- self.observation_space = (15,) # Increased features for better state representation
76
-
77
- # Portfolio tracking
78
- self.portfolio_history = []
79
- self.action_history = []
80
-
81
- def _initialize_market_data(self):
82
- """Initialize synthetic market data based on asset type"""
83
- n_points = 200 # Longer history for better indicators
84
-
85
- # Different volatility based on asset type
86
- volatility_map = {
87
- "Crypto": 0.03,
88
- "Stock": 0.015,
89
- "Forex": 0.008
90
- }
91
-
92
- volatility = volatility_map.get(self.config.asset_type, 0.02)
93
- base_price = 100.0
94
-
95
- for i in range(n_points):
96
- # More realistic price generation with momentum
97
- momentum = np.sin(i * 0.05) * 2
98
- noise = np.random.normal(0, volatility)
99
- price = base_price * (1 + momentum * 0.01 + noise)
100
- price = max(10.0, price) # Prevent negative prices
101
-
102
- self.price_history.append(price)
103
-
104
- # Volume with some correlation to price movement
105
- volume = 1000 + abs(price - base_price) * 50 + np.random.normal(0, 200)
106
- self.volume_history.append(max(100, volume))
107
-
108
- # Sentiment with persistence
109
- if i > 0:
110
- prev_sentiment = self.sentiment_history[-1]
111
- sentiment_change = np.random.normal(0, 0.08)
112
- sentiment = prev_sentiment + sentiment_change
113
- else:
114
- sentiment = 0.5 + np.random.normal(0, 0.1)
115
-
116
- self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
117
-
118
- self.current_price = self.price_history[-1]
119
-
120
- def _calculate_technical_indicators(self) -> List[float]:
121
- """Calculate technical indicators from price history"""
122
- prices = np.array(self.price_history[-50:]) # Use last 50 points
123
-
124
- if len(prices) < 2:
125
- return [0.0] * 6 # Default values
126
-
127
- returns = np.diff(prices) / prices[:-1]
128
-
129
- # Simple Moving Averages
130
- sma_short = np.mean(prices[-10:]) if len(prices) >= 10 else prices[-1]
131
- sma_long = np.mean(prices[-20:]) if len(prices) >= 20 else prices[-1]
132
-
133
- # RSI (Relative Strength Index)
134
- if len(returns) >= 14:
135
- gains = returns[returns > 0]
136
- losses = -returns[returns < 0]
137
- avg_gain = np.mean(gains[-14:]) if len(gains) > 0 else 0.001
138
- avg_loss = np.mean(losses[-14:]) if len(losses) > 0 else 0.001
139
- rsi = 100 - (100 / (1 + avg_gain / avg_loss))
140
- else:
141
- rsi = 50.0
142
-
143
- # Volatility (annualized)
144
- volatility = np.std(returns) * np.sqrt(252) if len(returns) > 1 else 0.1
145
-
146
- # Price momentum
147
- momentum = (prices[-1] / prices[-5] - 1) if len(prices) >= 5 else 0.0
148
-
149
- # Volume trend
150
- volumes = np.array(self.volume_history[-10:])
151
- volume_trend = np.mean(volumes[-5:]) / np.mean(volumes[-10:]) - 1 if len(volumes) >= 10 else 0.0
152
-
153
- return [sma_short/100, sma_long/100, rsi/100, volatility, momentum, volume_trend]
154
-
155
- def reset(self) -> Tuple[np.ndarray, Dict]:
156
- """Reset environment to initial state"""
157
- self.balance = self.initial_balance
158
- self.position = 0.0
159
- self.step_count = 0
160
- self.portfolio_history = []
161
- self.action_history = []
162
-
163
- # Reinitialize market data
164
- self.price_history = [100.0 + np.random.normal(0, 5)]
165
- self.volume_history = [1000 + np.random.normal(0, 200)]
166
- self.sentiment_history = [0.5 + np.random.normal(0, 0.1)]
167
- self.current_price = self.price_history[-1]
168
-
169
- obs = self._get_observation()
170
- info = self._get_info()
171
-
172
- return obs, info
173
-
174
- def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict]:
175
- """Execute one trading step"""
176
- self.step_count += 1
177
-
178
- # Generate new market data with more realistic dynamics
179
- self._update_market_data()
180
-
181
- # Execute trading action
182
- reward = self._execute_action(action)
183
-
184
- # Check termination conditions
185
- terminated = self.balance <= 0 or self.step_count >= self.max_steps
186
- truncated = False
187
-
188
- # Get new observation and info
189
- obs = self._get_observation()
190
- info = self._get_info()
191
-
192
- # Track portfolio value
193
- self.portfolio_history.append(info['net_worth'])
194
- self.action_history.append(action)
195
-
196
- return obs, reward, terminated, truncated, info
197
-
198
- def _update_market_data(self):
199
- """Update market data with realistic price movements"""
200
- # Price change with momentum and volatility clustering
201
- prev_returns = np.diff(self.price_history[-5:]) / self.price_history[-5:-1] if len(self.price_history) >= 6 else [0]
202
- momentum = np.mean(prev_returns) if prev_returns else 0
203
-
204
- volatility_map = {
205
- "Crypto": 0.025,
206
- "Stock": 0.012,
207
- "Forex": 0.006
208
- }
209
- base_volatility = volatility_map.get(self.config.asset_type, 0.015)
210
-
211
- # Volatility scaling based on risk level
212
- volatility = base_volatility * self.risk_multiplier
213
- price_change = momentum * 0.3 + np.random.normal(0, volatility)
214
-
215
- self.current_price = max(10.0, self.current_price * (1 + price_change))
216
- self.price_history.append(self.current_price)
217
-
218
- # Update volume with some noise
219
- base_volume = 1000
220
- volume_noise = np.random.normal(0, 200)
221
- new_volume = max(100, base_volume + abs(price_change) * 5000 + volume_noise)
222
- self.volume_history.append(new_volume)
223
-
224
- # Update sentiment with mean reversion
225
- current_sentiment = self.sentiment_history[-1]
226
- sentiment_reversion = (0.5 - current_sentiment) * 0.1 # Mean reversion
227
- sentiment_noise = np.random.normal(0, 0.08)
228
- new_sentiment = current_sentiment + sentiment_reversion + sentiment_noise
229
- self.sentiment_history.append(np.clip(new_sentiment, 0.0, 1.0))
230
-
231
- def _execute_action(self, action: int) -> float:
232
- """Execute trading action and calculate reward"""
233
- prev_net_worth = self.balance + self.position * self.current_price
234
- trade_size_multiplier = 0.2 * self.risk_multiplier # Risk-adjusted position sizing
235
-
236
- if action == 1: # Buy
237
- if self.balance > 0:
238
- trade_amount = min(self.balance * trade_size_multiplier, self.balance)
239
- cost = trade_amount * (1 + self.transaction_cost)
240
- if cost <= self.balance:
241
- shares_bought = trade_amount / self.current_price
242
- self.position += shares_bought
243
- self.balance -= cost
244
-
245
- elif action == 2: # Sell
246
- if self.position > 0:
247
- sell_fraction = trade_size_multiplier
248
- shares_to_sell = min(self.position * sell_fraction, self.position)
249
- proceeds = shares_to_sell * self.current_price * (1 - self.transaction_cost)
250
- self.position -= shares_to_sell
251
- self.balance += proceeds
252
-
253
- elif action == 3: # Close position
254
- if self.position > 0:
255
- proceeds = self.position * self.current_price * (1 - self.transaction_cost)
256
- self.balance += proceeds
257
- self.position = 0
258
-
259
- # Calculate new net worth and reward
260
- new_net_worth = self.balance + self.position * self.current_price
261
- raw_reward = (new_net_worth - prev_net_worth) / self.initial_balance * 100
262
-
263
- # Risk-adjusted reward with penalty for large drawdowns
264
- risk_penalty = 0.0
265
- if new_net_worth < self.initial_balance * 0.8: # 20% drawdown
266
- risk_penalty = (self.initial_balance - new_net_worth) / self.initial_balance * 10
267
-
268
- final_reward = raw_reward - risk_penalty
269
-
270
- return final_reward
271
-
272
- def _get_observation(self) -> np.ndarray:
273
- """Get current environment observation"""
274
- # Price-based features
275
- recent_prices = self.price_history[-20:] if len(self.price_history) >= 20 else [self.current_price] * 20
276
- price_features = [
277
- self.current_price / 100.0,
278
- np.mean(recent_prices) / 100.0,
279
- np.std(recent_prices) / 100.0,
280
- (self.current_price - np.min(recent_prices)) / (np.max(recent_prices) - np.min(recent_prices)) if len(recent_prices) > 1 else 0.5
281
- ]
282
-
283
- # Portfolio features
284
- portfolio_features = [
285
- self.balance / self.initial_balance,
286
- self.position * self.current_price / self.initial_balance,
287
- self.step_count / self.max_steps
288
- ]
289
-
290
- # Sentiment features
291
- recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
292
- sentiment_features = [
293
- np.mean(recent_sentiments),
294
- np.std(recent_sentiments),
295
- recent_sentiments[-1] # Latest sentiment
296
- ]
297
-
298
- # Technical indicators
299
- technical_features = self._calculate_technical_indicators()
300
-
301
- # Combine all features
302
- all_features = price_features + portfolio_features + sentiment_features + technical_features
303
-
304
- # Ensure fixed size and convert to numpy array
305
- observation = np.array(all_features[:15], dtype=np.float32)
306
-
307
- return observation
308
-
309
- def _get_info(self) -> Dict[str, Any]:
310
- """Get environment information for logging"""
311
- net_worth = self.balance + self.position * self.current_price
312
- return_total = (net_worth - self.initial_balance) / self.initial_balance * 100
313
-
314
- return {
315
- 'net_worth': net_worth,
316
- 'return_percent': return_total,
317
- 'position_value': self.position * self.current_price,
318
- 'cash_balance': self.balance,
319
- 'current_price': self.current_price,
320
- 'steps': self.step_count
321
- }
322
-
323
- class DQNAgent:
324
- """
325
- Deep Q-Network agent for trading decisions.
326
- Implements experience replay and target network for stable learning.
327
- """
328
-
329
- def __init__(self, state_dim: int, action_dim: int, config: TradingConfig, device: str = 'cpu'):
330
- self.device = torch.device(device)
331
- self.state_dim = state_dim
332
- self.action_dim = action_dim
333
- self.config = config
334
-
335
- # Q-network and target network
336
- self.q_network = self._build_network(state_dim, action_dim)
337
- self.target_network = self._build_network(state_dim, action_dim)
338
- self.target_network.load_state_dict(self.q_network.state_dict())
339
-
340
- # Optimization
341
- self.optimizer = optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
342
- self.criterion = nn.MSELoss()
343
-
344
- # Experience replay
345
- self.memory = deque(maxlen=config.memory_size)
346
-
347
- # Exploration parameters
348
- self.epsilon = config.epsilon_start
349
- self.epsilon_min = config.epsilon_min
350
- self.epsilon_decay = config.epsilon_decay
351
-
352
- # Training parameters
353
- self.batch_size = config.batch_size
354
- self.gamma = config.gamma
355
- self.target_update = config.target_update
356
- self.steps = 0
357
-
358
- def _build_network(self, state_dim: int, action_dim: int) -> nn.Sequential:
359
- """Build the neural network for Q-value approximation"""
360
- return nn.Sequential(
361
- nn.Linear(state_dim, self.config.hidden_size),
362
- nn.ReLU(),
363
- nn.Linear(self.config.hidden_size, self.config.hidden_size),
364
- nn.ReLU(),
365
- nn.Linear(self.config.hidden_size, self.config.hidden_size // 2),
366
- nn.ReLU(),
367
- nn.Linear(self.config.hidden_size // 2, action_dim)
368
- ).to(self.device)
369
 
370
- def select_action(self, state: np.ndarray, training: bool = True) -> int:
371
- """Select action using epsilon-greedy policy"""
372
- if training and random.random() < self.epsilon:
373
- return random.randint(0, self.action_dim - 1)
374
-
375
- state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
376
-
377
- with torch.no_grad():
378
- q_values = self.q_network(state_tensor)
379
- return q_values.argmax(1).item()
380
-
381
- def store_transition(self, state: np.ndarray, action: int, reward: float,
382
- next_state: np.ndarray, done: bool):
383
- """Store experience in replay memory"""
384
- self.memory.append((state, action, reward, next_state, done))
385
-
386
- def update(self) -> float:
387
- """Update Q-network using experience replay"""
388
- if len(self.memory) < self.batch_size:
389
- return 0.0
390
-
391
- # Sample batch from memory
392
- batch = random.sample(self.memory, self.batch_size)
393
- states, actions, rewards, next_states, dones = zip(*batch)
394
-
395
- # Convert to tensors
396
- states = torch.FloatTensor(np.array(states)).to(self.device)
397
- actions = torch.LongTensor(actions).to(self.device)
398
- rewards = torch.FloatTensor(rewards).to(self.device)
399
- next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
400
- dones = torch.BoolTensor(dones).to(self.device) # Fixed: Use BoolTensor instead of FloatTensor
401
-
402
- # Current Q values
403
- current_q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
404
-
405
- # Next Q values from target network
406
- with torch.no_grad():
407
- next_q_values = self.target_network(next_states).max(1)[0]
408
- # Fixed: Use proper boolean masking
409
- target_q_values = rewards + self.gamma * next_q_values * (~dones).float()
410
-
411
- # Compute loss and update
412
- loss = self.criterion(current_q_values, target_q_values)
413
-
414
- self.optimizer.zero_grad()
415
- loss.backward()
416
-
417
- # Gradient clipping for stability
418
- torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 1.0)
419
- self.optimizer.step()
420
-
421
- # Update target network periodically
422
- self.steps += 1
423
- if self.steps % self.target_update == 0:
424
- self.target_network.load_state_dict(self.q_network.state_dict())
425
-
426
- # Decay epsilon
427
- self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
428
-
429
- return loss.item()
430
-
431
- def save(self, path: str):
432
- """Save agent parameters"""
433
- torch.save({
434
- 'q_network_state_dict': self.q_network.state_dict(),
435
- 'target_network_state_dict': self.target_network.state_dict(),
436
- 'optimizer_state_dict': self.optimizer.state_dict(),
437
- 'epsilon': self.epsilon,
438
- 'steps': self.steps
439
- }, path)
440
-
441
- def load(self, path: str):
442
- """Load agent parameters"""
443
- checkpoint = torch.load(path, map_location=self.device)
444
- self.q_network.load_state_dict(checkpoint['q_network_state_dict'])
445
- self.target_network.load_state_dict(checkpoint['target_network_state_dict'])
446
- self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
447
- self.epsilon = checkpoint['epsilon']
448
- self.steps = checkpoint['steps']
449
-
450
- class TradingDemo:
451
- """
452
- Main demonstration class integrating trading environment and DQN agent.
453
- Provides interface for training, simulation, and visualization.
454
- """
455
-
456
- def __init__(self):
457
- self.config = TradingConfig()
458
- self.env = None
459
- self.agent = None
460
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
461
- print(f"Using device: {self.device}")
462
-
463
- # Training history
464
- self.training_history = {
465
- 'episode_rewards': [],
466
- 'episode_losses': [],
467
- 'epsilon_history': []
468
- }
469
-
470
- def initialize(self, balance: float, risk: str, asset: str) -> str:
471
- """Initialize trading environment and agent"""
472
- try:
473
- self.config.initial_balance = float(balance)
474
- self.config.risk_level = risk
475
- self.config.asset_type = asset
476
-
477
- # Create environment and agent
478
- self.env = AdvancedTradingEnvironment(self.config)
479
- self.agent = DQNAgent(15, 4, self.config, self.device)
480
-
481
- # Reset training history
482
- self.training_history = {
483
- 'episode_rewards': [],
484
- 'episode_losses': [],
485
- 'epsilon_history': []
486
- }
487
-
488
- return f"✅ System initialized! Balance: ${balance}, Risk: {risk}, Asset: {asset}"
489
-
490
- except Exception as e:
491
- return f"❌ Initialization failed: {str(e)}"
492
-
493
- def train(self, episodes: int):
494
- """Train the DQN agent"""
495
- if self.env is None or self.agent is None:
496
- yield "❌ Please initialize the system first!", None
497
- return
498
-
499
- try:
500
- episodes = int(episodes)
501
- for episode in range(episodes):
502
- # Reset environment
503
- obs, _ = self.env.reset()
504
- total_reward = 0
505
- episode_loss = 0
506
- update_count = 0
507
- done = False
508
-
509
- while not done:
510
- # Select and execute action
511
- action = self.agent.select_action(obs)
512
- next_obs, reward, done, _, info = self.env.step(action)
513
-
514
- # Store experience and update
515
- self.agent.store_transition(obs, action, reward, next_obs, done)
516
- loss = self.agent.update()
517
-
518
- if loss > 0:
519
- episode_loss += loss
520
- update_count += 1
521
-
522
- total_reward += reward
523
- obs = next_obs
524
-
525
- # Calculate average loss
526
- avg_loss = episode_loss / max(update_count, 1)
527
-
528
- # Update history
529
- self.training_history['episode_rewards'].append(total_reward)
530
- self.training_history['episode_losses'].append(avg_loss)
531
- self.training_history['epsilon_history'].append(self.agent.epsilon)
532
-
533
- # Yield progress
534
- progress = f"Episode {episode+1}/{episodes} | " \
535
- f"Reward: {total_reward:.2f} | " \
536
- f"Loss: {avg_loss:.4f} | " \
537
- f"Epsilon: {self.agent.epsilon:.3f} | " \
538
- f"Net Worth: ${info['net_worth']:.2f}"
539
-
540
- # Create training plot every 10 episodes or at the end
541
- if (episode + 1) % 10 == 0 or episode == episodes - 1:
542
- plot = self._create_training_plot()
543
- yield progress, plot
544
- else:
545
- yield progress, None
546
-
547
- yield "✅ Training completed successfully!", self._create_training_plot()
548
-
549
- except Exception as e:
550
- yield f"❌ Training error: {str(e)}", None
551
-
552
- def simulate(self, steps: int):
553
- """Run trading simulation with current policy"""
554
- if self.env is None or self.agent is None:
555
- return "❌ Please initialize and train the system first!", None
556
-
557
- try:
558
- steps = int(steps)
559
- obs, _ = self.env.reset()
560
-
561
- # Tracking data
562
- prices = []
563
- actions = []
564
- net_worths = []
565
- portfolio_values = []
566
- cash_balances = []
567
-
568
- for step in range(steps):
569
- action = self.agent.select_action(obs, training=False)
570
- next_obs, reward, done, _, info = self.env.step(action)
571
-
572
- # Track metrics
573
- prices.append(self.env.current_price)
574
- actions.append(action)
575
- net_worths.append(info['net_worth'])
576
- portfolio_values.append(info['position_value'])
577
- cash_balances.append(info['cash_balance'])
578
-
579
- obs = next_obs
580
- if done:
581
- break
582
-
583
- # Create comprehensive visualization
584
- fig = self._create_simulation_plot(prices, actions, net_worths, portfolio_values, cash_balances)
585
-
586
- final_return = (net_worths[-1] - self.config.initial_balance) / self.config.initial_balance * 100
587
- result_text = f"✅ Simulation completed! Final Return: {final_return:.2f}% | " \
588
- f"Final Net Worth: ${net_worths[-1]:.2f}"
589
-
590
- return result_text, fig
591
-
592
- except Exception as e:
593
- return f"❌ Simulation error: {str(e)}", None
594
-
595
- def _create_training_plot(self):
596
- """Create training progress visualization"""
597
- if not self.training_history['episode_rewards']:
598
- return None
599
-
600
- episodes = list(range(1, len(self.training_history['episode_rewards']) + 1))
601
-
602
- fig = make_subplots(rows=2, cols=2,
603
- subplot_titles=('Episode Rewards', 'Training Loss',
604
- 'Epsilon Decay', 'Moving Average Reward'),
605
- vertical_spacing=0.12)
606
-
607
- # Rewards
608
- fig.add_trace(
609
- go.Scatter(x=episodes, y=self.training_history['episode_rewards'],
610
- mode='lines', name='Reward', line=dict(color='blue')),
611
- row=1, col=1
612
- )
613
-
614
- # Loss
615
- fig.add_trace(
616
- go.Scatter(x=episodes, y=self.training_history['episode_losses'],
617
- mode='lines', name='Loss', line=dict(color='red')),
618
- row=1, col=2
619
- )
620
-
621
- # Epsilon
622
- fig.add_trace(
623
- go.Scatter(x=episodes, y=self.training_history['epsilon_history'],
624
- mode='lines', name='Epsilon', line=dict(color='green')),
625
- row=2, col=1
626
- )
627
-
628
- # Moving average reward
629
- window = min(20, len(episodes))
630
- moving_avg = [np.mean(self.training_history['episode_rewards'][max(0, i-window):i+1])
631
- for i in range(len(episodes))]
632
- fig.add_trace(
633
- go.Scatter(x=episodes, y=moving_avg,
634
- mode='lines', name='MA Reward', line=dict(color='orange', width=2)),
635
- row=2, col=2
636
- )
637
-
638
- fig.update_layout(height=600, showlegend=True, title_text="Training Progress")
639
- return fig
640
-
641
- def _create_simulation_plot(self, prices, actions, net_worths, portfolio_values, cash_balances):
642
- """Create comprehensive simulation results visualization"""
643
- fig = make_subplots(rows=2, cols=2,
644
- subplot_titles=('Price & Actions', 'Portfolio Performance',
645
- 'Portfolio Composition', 'Action Distribution'),
646
- vertical_spacing=0.12,
647
- horizontal_spacing=0.1)
648
-
649
- steps = list(range(len(prices)))
650
-
651
- # Price and actions
652
- fig.add_trace(
653
- go.Scatter(x=steps, y=prices, mode='lines', name='Price', line=dict(color='blue')),
654
- row=1, col=1
655
- )
656
-
657
- # Add action markers
658
- action_colors = ['gray', 'green', 'red', 'orange'] # Hold, Buy, Sell, Close
659
- action_names = ['Hold', 'Buy', 'Sell', 'Close']
660
- for action in range(4):
661
- action_indices = [i for i, a in enumerate(actions) if a == action]
662
- if action_indices:
663
- action_prices = [prices[i] for i in action_indices]
664
- fig.add_trace(
665
- go.Scatter(x=action_indices, y=action_prices,
666
- mode='markers', name=action_names[action],
667
- marker=dict(color=action_colors[action], size=8)),
668
- row=1, col=1
669
- )
670
-
671
- # Portfolio performance
672
- initial_balance = self.config.initial_balance
673
- returns = [(nw - initial_balance) / initial_balance * 100 for nw in net_worths]
674
-
675
- fig.add_trace(
676
- go.Scatter(x=steps, y=net_worths, mode='lines', name='Net Worth', line=dict(color='purple')),
677
- row=1, col=2
678
- )
679
- fig.add_trace(
680
- go.Scatter(x=steps, y=returns, mode='lines', name='Return %', line=dict(color='orange'), yaxis='y2'),
681
- row=1, col=2
682
- )
683
-
684
- # Portfolio composition
685
- fig.add_trace(
686
- go.Scatter(x=steps, y=portfolio_values, mode='lines', name='Portfolio Value', line=dict(color='green')),
687
- row=2, col=1
688
- )
689
- fig.add_trace(
690
- go.Scatter(x=steps, y=cash_balances, mode='lines', name='Cash Balance', line=dict(color='blue')),
691
- row=2, col=1
692
- )
693
-
694
- # Action distribution
695
- action_counts = [actions.count(i) for i in range(4)]
696
- fig.add_trace(
697
- go.Bar(x=action_names, y=action_counts,
698
- marker_color=action_colors, name='Action Count'),
699
- row=2, col=2
700
- )
701
-
702
- # Update layout
703
- fig.update_layout(height=700, showlegend=True, title_text="Trading Simulation Results")
704
- fig.update_yaxes(title_text="Return (%)", row=1, col=2, secondary_y=True)
705
- fig.update_yaxes(title_text="Value ($)", row=1, col=2, secondary_y=False)
706
-
707
- return fig
708
-
709
- # Create and launch Gradio interface
710
  def create_interface():
711
- """Create Gradio interface for the trading demo"""
712
  demo = TradingDemo()
713
-
714
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
715
  gr.Markdown("""
716
  # 🤖 Advanced AI Trading Demo
717
  **Deep Reinforcement Learning for Financial Markets**
718
-
719
  This demo shows a DQN agent learning to trade in simulated financial markets.
720
  The agent learns optimal trading strategies through reinforcement learning.
721
  """)
@@ -723,7 +25,6 @@ def create_interface():
723
  with gr.Row():
724
  with gr.Column(scale=1):
725
  gr.Markdown("## 🎯 Configuration")
726
-
727
  balance = gr.Slider(1000, 50000, 10000, step=1000, label="Initial Balance ($)")
728
  risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
729
  asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
@@ -771,17 +72,13 @@ def create_interface():
771
  2. **Initialize**: Click 'Initialize System' to set up the trading environment
772
  3. **Train**: Start training the AI agent (recommended: 100+ episodes)
773
  4. **Simulate**: Run a trading simulation to see the trained agent in action
774
-
775
  ## 🎮 Actions:
776
  - **0: Hold** - Maintain current position
777
  - **1: Buy** - Purchase asset (20% of balance)
778
  - **2: Sell** - Sell portion of position (20%)
779
  - **3: Close** - Liquidate entire position
780
  """)
781
-
782
  return interface
783
 
784
- # Launch the application
785
- if __name__ == "__main__":
786
- interface = create_interface()
787
- interface.launch(share=True, server_name="0.0.0.0", server_port=7860)
 
5
  import torch.optim as optim
6
  from collections import deque
7
  import random
 
8
  from typing import Dict, Tuple, Any, List, Optional
9
  import plotly.graph_objects as go
10
  from plotly.subplots import make_subplots
 
11
 
12
+ # (تمام کلاس‌های TradingConfig, AdvancedTradingEnvironment, DQNAgent, TradingDemo عیناً حفظ شده‌اند...)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # این تابع همانند قبل است
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def create_interface():
 
16
  demo = TradingDemo()
 
17
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Trading Demo") as interface:
18
  gr.Markdown("""
19
  # 🤖 Advanced AI Trading Demo
20
  **Deep Reinforcement Learning for Financial Markets**
 
21
  This demo shows a DQN agent learning to trade in simulated financial markets.
22
  The agent learns optimal trading strategies through reinforcement learning.
23
  """)
 
25
  with gr.Row():
26
  with gr.Column(scale=1):
27
  gr.Markdown("## 🎯 Configuration")
 
28
  balance = gr.Slider(1000, 50000, 10000, step=1000, label="Initial Balance ($)")
29
  risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk Level")
30
  asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset Type")
 
72
  2. **Initialize**: Click 'Initialize System' to set up the trading environment
73
  3. **Train**: Start training the AI agent (recommended: 100+ episodes)
74
  4. **Simulate**: Run a trading simulation to see the trained agent in action
 
75
  ## 🎮 Actions:
76
  - **0: Hold** - Maintain current position
77
  - **1: Buy** - Purchase asset (20% of balance)
78
  - **2: Sell** - Sell portion of position (20%)
79
  - **3: Close** - Liquidate entire position
80
  """)
 
81
  return interface
82
 
83
+ # نکته مهم: فقط این خط باید اجرا شود و نام متغیر باید demo باشد
84
+ demo = create_interface()