Spaces:

OmidSakaki
/

VisualTradingAI

Sleeping

App Files Files Community

OmidSakaki commited on Oct 16, 2025

Commit

dd46062

verified ·

1 Parent(s): cb87d3a

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -625

app.py CHANGED Viewed

@@ -1,658 +1,279 @@
 import gradio as gr
 import numpy as np
-import pandas as pd
 import torch
-import time
-import sys
-import os
-import threading
-import logging
-from datetime import datetime, timedelta
-from typing import Dict, Any, Optional, Tuple
-import warnings
-warnings.filterwarnings('ignore')
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Create directories safely
-def setup_directories():
-    """Setup project directories with error handling"""
-    directories = ['src', 'src/environments', 'src/agents', 'src/sentiment', 'src/visualizers', 'src/utils']
-    for dir_path in directories:
-        try:
-            os.makedirs(dir_path, exist_ok=True)
-            init_file = os.path.join(dir_path, '__init__.py')
-            if not os.path.exists(init_file):
-                with open(init_file, 'w') as f:
-                    f.write('# Auto-generated init file\n')
-        except Exception as e:
-            logger.warning(f"Could not create directory {dir_path}: {e}")
-setup_directories()
-# Add src to path safely
-if 'src' not in sys.path:
-    sys.path.insert(0, 'src')
-# Safe imports with fallbacks
-try:
-    from src.environments.advanced_trading_env import AdvancedTradingEnvironment
-    from src.agents.advanced_agent import AdvancedTradingAgent
-    from src.utils.config import TradingConfig
-    from src.visualizers.chart_renderer import ChartRenderer
-    CUSTOM_MODULES_AVAILABLE = True
-except ImportError as e:
-    logger.warning(f"Custom modules not available: {e}. Using fallback mode.")
-    CUSTOM_MODULES_AVAILABLE = False
-    # Fallback imports will be defined below
-class SafeTradingDemo:
-    """Safe trading demo with comprehensive error handling"""
-    def __init__(self):
-        self.env = None
-        self.agent = None
-        self.config = TradingConfig() if CUSTOM_MODULES_AVAILABLE else None
-        self.renderer = ChartRenderer() if CUSTOM_MODULES_AVAILABLE else None
-        self.current_state = None
-        self.is_training = False
-        self.training_complete = False
-        self.live_trading = False
-        self.trading_thread = None
-        self.lock = threading.Lock()
-        self.live_data: list = []
-        self.performance_data: list = []
-        self.action_history: list = []
-        self.training_history: list = []
-        self.initialized = False
-        self.start_time = None
-        self.last_update = None
-        # Fallback environment and agent if custom modules unavailable
-        if not CUSTOM_MODULES_AVAILABLE:
-            self._setup_fallback_components()
-    def _setup_fallback_components(self):
-        """Setup basic fallback components"""
-        class FallbackEnvironment:
-            def __init__(self, initial_balance, risk_level, asset_type):
-                self.initial_balance = initial_balance
-                self.current_balance = initial_balance
-                self.position = 0
-                self.current_price = 100.0
-            def reset(self):
-                self.current_balance = self.initial_balance
-                self.position = 0
-                self.current_price = 100.0 + np.random.normal(0, 5)
-                return np.random.rand(84, 84, 4).astype(np.float32)
-            def step(self, action):
-                self.current_price += np.random.normal(0, 1)
-                reward = np.random.normal(0, 10)
-                self.current_balance += reward * 0.1
-                done = False
-                info = {'net_worth': self.current_balance}
-                next_state = np.random.rand(84, 84, 4).astype(np.float32)
-                return next_state, reward, done, info
-        class FallbackAgent:
-            def __init__(self, state_dim, action_dim):
-                self.epsilon = 1.0
-                self.action_dim = action_dim
-            def select_action(self, state):
-                if np.random.random() < self.epsilon:
-                    return np.random.randint(0, self.action_dim)
-                return 0
-            def store_transition(self, *args):
-                pass
-            def update(self):
-                self.epsilon = max(0.01, self.epsilon * 0.999)
-                return np.random.random()
-        self.FallbackEnvironment = FallbackEnvironment
-        self.FallbackAgent = FallbackAgent
-    def initialize_environment(self, initial_balance: float, risk_level: str,
-                             asset_type: str) -> str:
-        """Initialize trading environment with comprehensive validation"""
-        try:
-            with self.lock:
-                if self.live_trading:
-                    return "⚠️ لطفاً ابتدا معاملات را متوقف کنید"
-                # Validate inputs
-                if initial_balance < 1000:
-                    return "❌ سرمایه اولیه باید حداقل 1000 دلار باشد"
-                if risk_level not in ["Low", "Medium", "High"]:
-                    return "❌ سطح ریسک نامعتبر"
-                if asset_type not in ["Crypto", "Stock", "Forex"]:
-                    return "❌ نوع دارایی نامعتبر"
-                logger.info(f"Initializing environment: balance={initial_balance}, "
-                          f"risk={risk_level}, asset={asset_type}")
-                if CUSTOM_MODULES_AVAILABLE:
-                    self.env = AdvancedTradingEnvironment(
-                        initial_balance=float(initial_balance),
-                        risk_level=risk_level,
-                        asset_type=asset_type,
-                        use_sentiment=False  # Disable for demo stability
-                    )
-                    self.agent = AdvancedTradingAgent(
-                        state_dim=(84, 84, 4),
-                        action_dim=4,
-                        learning_rate=self.config.learning_rate
-                    )
-                else:
-                    self.env = self.FallbackEnvironment(initial_balance, risk_level, asset_type)
-                    self.agent = self.FallbackAgent((84, 84, 4), 4)
-                self.current_state = self.env.reset()
-                self._reset_data()
-                self.initialized = True
-                self.start_time = datetime.now()
-                return (f"✅ محیط معاملاتی با موفقیت راه‌اندازی شد!\n\n"
-                       f"💰 سرمایه: ${initial_balance:,.2f}\n"
-                       f"🎯 نوع دارایی: {asset_type}\n"
-                       f"⚡ سطح ریسک: {risk_level}\n\n"
-                       f"🚀 آماده برای آموزش...")
-        except Exception as e:
-            logger.error(f"Environment initialization error: {e}", exc_info=True)
-            return f"❌ خطا در راه‌اندازی: {str(e)}"
-    def _reset_data(self):
-        """Reset all data structures"""
-        self.live_data.clear()
-        self.performance_data.clear()
-        self.action_history.clear()
-        self.training_history.clear()
-        self.training_complete = False
-        self.live_trading = False
-    def train_agent(self, num_episodes: int):
-        """Train agent with progress updates and safety checks"""
-        if not self.initialized:
-            yield "❌ ابتدا محیط را راه‌اندازی کنید", None
-            return
-        if self.live_trading:
-            yield "⚠️ ابت��ا معاملات را متوقف کنید", None
-            return
-        try:
-            num_episodes = max(1, min(100, int(num_episodes)))  # Limit episodes
-            self.is_training = True
-            for episode in range(num_episodes):
-                if not self.is_training:
-                    break
-                episode_start = time.time()
-                state = self.env.reset()
-                episode_reward = 0.0
-                done = False
-                step_count = 0
-                max_steps = 200  # Safety limit
-                while not done and step_count < max_steps:
-                    action = self.agent.select_action(state)
-                    next_state, reward, done, info = self.env.step(action)
-                    try:
-                        self.agent.store_transition(state, action, reward, next_state, done)
-                    except:
-                        pass  # Ignore storage errors in demo
-                    state = next_state
-                    episode_reward += reward
-                    step_count += 1
-                # Update agent
-                try:
-                    loss = self.agent.update()
-                except:
-                    loss = 0.0
-                # Store episode data
-                self.training_history.append({
-                    'episode': episode,
-                    'reward': episode_reward,
-                    'net_worth': info.get('net_worth', 10000),
-                    'loss': loss,
-                    'steps': step_count,
-                    'duration': time.time() - episode_start
-                })
-                # Create progress visualization
-                try:
-                    progress_fig = self._create_training_chart()
-                except:
-                    progress_fig = None
-                # Progress status
-                progress = (episode + 1) / num_episodes * 100
-                status = (f"🔄 آموزش در حال انجام...\n"
-                         f"📊 اپیزود {episode+1}/{num_episodes} ({progress:.1f}%)\n"
-                         f"🎯 پاداش: {episode_reward:.2f}\n"
-                         f"💰 پرتفولیو: ${info.get('net_worth', 0):.2f}\n"
-                         f"📉 Loss: {loss:.4f}")
-                yield status, progress_fig
-                time.sleep(0.05)  # Brief pause for UI responsiveness
-            self.training_complete = True
-            final_stats = self._calculate_training_stats()
-            yield final_stats, self._create_training_chart()
-        except Exception as e:
-            logger.error(f"Training error: {e}", exc_info=True)
-            self.is_training = False
-            yield f"❌ خطا در آموزش: {str(e)}", None
-        finally:
-            self.is_training = False
-    def _calculate_training_stats(self) -> str:
-        """Calculate and format training statistics"""
-        if not self.training_history:
-            return "آمار آموزش در دسترس نیست"
-        rewards = [h['reward'] for h in self.training_history]
-        net_worths = [h['net_worth'] for h in self.training_history]
-        return (f"✅ آموزش تکمیل شد!\n\n"
-               f"📊 آمار نهایی:\n"
-               f"• اپیزودها: {len(rewards)}\n"
-               f"• میانگین پاداش: {np.mean(rewards):.2f}\n"
-               f"• پاداش نهایی: {rewards[-1]:.2f}\n"
-               f"• ارزش نهایی: ${net_worths[-1]:.2f}\n"
-               f"🚀 آماده معامله Real-Time!")
-    def _create_training_chart(self):
-        """Create training progress chart"""
-        try:
-            if not self.training_history:
-                return None
-            import plotly.graph_objects as go
-            from plotly.subplots import make_subplots
-            episodes = [h['episode'] for h in self.training_history]
-            rewards = [h['reward'] for h in self.training_history]
-            net_worths = [h['net_worth'] for h in self.training_history]
-            fig = make_subplots(rows=2, cols=1, subplot_titles=['پاداش اپیزود', 'ارزش پرتفولیو'])
-            fig.add_trace(go.Scatter(x=episodes, y=rewards, mode='lines+markers',
-                                   name='پاداش', line=dict(color='blue')), row=1, col=1)
-            fig.add_trace(go.Scatter(x=episodes, y=net_worths, mode='lines+markers',
-                                   name='پرتفولیو', line=dict(color='green')), row=2, col=1)
-            fig.update_layout(height=400, title="📈 پیشرفت آموزش", template="plotly_white")
-            return fig
-        except:
-            return None
-    def start_live_trading(self) -> Tuple[str, Any, Any, Any]:
-        """Start live trading with safety checks"""
-        try:
-            with self.lock:
-                if not self.training_complete and CUSTOM_MODULES_AVAILABLE:
-                    return "⚠️ لطفاً ابتدا آموزش را کامل کنید", None, None, None
-                if self.live_trading:
-                    return "⚠️ معاملات در حال اجراست", None, None, None
-                self.live_trading = True
-                self._reset_data()
-                self._initialize_demo_data()
-                # Start trading thread
-                self.trading_thread = threading.Thread(target=self._trading_loop, daemon=True)
-                self.trading_thread.start()
-                time.sleep(0.5)  # Allow thread to initialize
-                return self._get_live_status()
-        except Exception as e:
-            logger.error(f"Live trading start error: {e}")
-            return f"❌ خطا در شروع معاملات: {str(e)}", None, None, None
-    def _trading_loop(self):
-        """Safe trading loop with error handling"""
-        max_steps = 500
-        step = 0
-        while self.live_trading and step < max_steps:
-            try:
-                with self.lock:
-                    if not self.initialized or self.env is None:
-                        break
-                    # Get action
-                    action = self.agent.select_action(self.current_state)
-                    # Execute step
-                    next_state, reward, done, info = self.env.step(action)
-                    self.current_state = next_state
-                    # Generate demo data
-                    self._generate_demo_step(action, reward, info)
-                step += 1
-                time.sleep(1)  # 1 second intervals
-            except Exception as e:
-                logger.error(f"Trading loop error: {e}")
-                time.sleep(2)
-                continue
-        self.live_trading = False
-    def _generate_demo_step(self, action: int, reward: float, info: Dict):
-        """Generate realistic demo data"""
-        current_time = datetime.now()
-        last_price = self.live_data[-1]['price'] if self.live_data else 100.0
-        # Simulate price movement
-        base_change = np.random.normal(0, 0.5)
-        action_bias = {0: 0, 1: 0.3, 2: -0.3, 3: 0}[action]
-        new_price = max(50, last_price + base_change + action_bias)
-        # Update net worth
-        net_worth = info.get('net_worth', self.env.initial_balance + reward * 10)
-        self.live_data.append({
-            'timestamp': current_time,
-            'price': new_price,
-            'action': action,
-            'net_worth': net_worth,
-            'reward': reward,
-            'volume': np.random.randint(1000, 10000)
-        })
-        # Keep recent data only
-        if len(self.live_data) > 100:
-            self.live_data.pop(0)
-        self.action_history.append({
-            'step': len(self.action_history),
-            'action': action,
-            'reward': reward,
-            'price': new_price,
-            'timestamp': current_time
-        })
-    def _initialize_demo_data(self):
-        """Initialize demo data"""
-        base_price = 100.0
-        for i in range(10):
-            self.live_data.append({
-                'timestamp': datetime.now() - timedelta(seconds=10-i),
-                'price': base_price + np.random.normal(0, 2),
-                'action': 0,
-                'net_worth': self.env.initial_balance if self.env else 10000,
-                'reward': 0,
-                'volume': np.random.randint(1000, 5000)
-            })
-    def _get_live_status(self) -> Tuple[str, Any, Any, pd.DataFrame]:
-        """Get current live trading status"""
-        try:
-            if not self.live_data:
-                return "📊 در حال آماده‌سازی...", None, None, self._create_empty_stats()
-            current = self.live_data[-1]
-            initial = self.env.initial_balance if self.env else 10000
-            profit = current['net_worth'] - initial
-            profit_pct = (profit / initial) * 100
-            action_names = ["نگهداری", "خرید", "فروش", "بستن"]
-            status = (f"🎯 معاملات Real-Time فعال\n"
-                     f"💰 قیمت: ${current['price']:.2f}\n"
-                     f"🎪 اقدام: {action_names[current['action']]}\n"
-                     f"💼 پرتفولیو: ${current['net_worth']:.2f}\n"
-                     f"📈 P&L: ${profit:+.2f} ({profit_pct:+.2f}%)")
-            live_fig = self._create_live_chart()
-            perf_fig = self._create_performance_chart()
-            stats_df = self._create_stats_table()
-            return status, live_fig, perf_fig, stats_df
-        except Exception as e:
-            logger.error(f"Status update error: {e}")
-            return "❌ خطا در به‌روزرسانی", None, None, self._create_empty_stats()
-    def get_live_update(self) -> Tuple[str, Any, Any, pd.DataFrame]:
-        """Manual live update trigger"""
-        return self._get_live_status()
-    def stop_live_trading(self) -> Tuple[str, Any, Any, pd.DataFrame]:
-        """Stop live trading safely"""
-        try:
-            with self.lock:
-                self.live_trading = False
-                if self.trading_thread and self.trading_thread.is_alive():
-                    self.trading_thread.join(timeout=2.0)
-            if self.live_data:
-                final = self.live_data[-1]
-                initial = self.env.initial_balance if self.env else 10000
-                profit = final['net_worth'] - initial
-                profit_pct = (profit / initial) * 100
-                actions = [h['action'] for h in self.action_history]
-                action_counts = {i: actions.count(i) for i in range(4)}
-                status = (f"🛑 معاملات متوقف شد\n\n"
-                         f"📊 نتایج نهایی:\n"
-                         f"• سرمایه نهایی: ${final['net_worth']:.2f}\n"
-                         f"• سود/زیان: ${profit:+.2f} ({profit_pct:+.2f}%)\n"
-                         f"• کل اقدامات: {len(actions)}\n"
-                         f"• خرید: {action_counts[1]} | فروش: {action_counts[2]}")
-            else:
-                status = "معاملات متوقف شد - داده‌ای ثبت نشده"
-            return status, self._create_live_chart(), self._create_performance_chart(), self._create_stats_table()
-        except Exception as e:
-            logger.error(f"Stop trading error: {e}")
-            return f"❌ خطا در توقف: {str(e)}", None, None, self._create_empty_stats()
-    def _create_live_chart(self):
-        """Create live price chart"""
-        try:
-            if not self.live_data:
-                import plotly.graph_objects as go
-                fig = go.Figure()
-                fig.update_layout(title="در حال آماده‌سازی...", height=400)
-                return fig
-            import plotly.graph_objects as go
-            from plotly.subplots import make_subplots
-            data = self.live_data[-50:]  # Last 50 points
-            times = [d['timestamp'] for d in data]
-            prices = [d['price'] for d in data]
-            volumes = [d['volume'] for d in data]
-            fig = make_subplots(rows=2, cols=1, row_heights=[0.7, 0.3],
-                              subplot_titles=['قیمت', 'حجم'])
-            fig.add_trace(go.Scatter(x=times, y=prices, mode='lines', name='قیمت',
-                                   line=dict(color='cyan', width=2)), row=1, col=1)
-            # Action markers
-            for action, color, name in [(1, 'green', 'خرید'), (2, 'red', 'فروش')]:
-                action_times = [d['timestamp'] for d in data if d['action'] == action]
-                action_prices = [d['price'] for d in data if d['action'] == action]
-                if action_times:
-                    fig.add_trace(go.Scatter(x=action_times, y=action_prices, mode='markers',
-                                           marker=dict(color=color, size=10),
-                                           name=name), row=1, col=1)
-            fig.add_trace(go.Bar(x=times, y=volumes, name='حجم', marker_color='blue',
-                               opacity=0.6), row=2, col=1)
-            fig.update_layout(height=450, template="plotly_dark", showlegend=True)
-            return fig
-        except:
-            return None
-    def _create_performance_chart(self):
-        """Create performance chart"""
-        try:
-            if not self.live_data:
-                import plotly.graph_objects as go
-                fig = go.Figure()
-                fig.update_layout(title="در حال آماده‌سازی...", height=300)
-                return fig
-            import plotly.graph_objects as go
-            times = [d['timestamp'] for d in self.live_data]
-            net_worths = [d['net_worth'] for d in self.live_data]
-            fig = go.Figure()
-            fig.add_trace(go.Scatter(x=times, y=net_worths, mode='lines', name='پرتفولیو',
-                                   line=dict(color='green', width=3)))
-            initial = self.env.initial_balance if self.env else 10000
-            fig.add_hline(y=initial, line_dash="dash", line_color="red",
-                         annotation_text=f"سرمایه اولیه: ${initial:.2f}")
-            fig.update_layout(height=350, title="عملکرد پرتفولیو", template="plotly_dark")
-            return fig
-        except:
-            return None
-    def _create_stats_table(self) -> pd.DataFrame:
-        """Create statistics table"""
-        try:
-            if not self.live_data:
-                return self._create_empty_stats()
-            current = self.live_data[-1]
-            initial = self.env.initial_balance if self.env else 10000
-            profit = current['net_worth'] - initial
-            profit_pct = (profit / initial) * 100
-            stats = {
-                'متریک': ['💰 قیمت فعلی', '💼 پرتفولیو', '📈 P&L', '🎯 اقدام اخیر', '⏰ گام‌ها'],
-                'مقدار': [
-                    f"${current['price']:.2f}",
-                    f"${current['net_worth']:.2f}",
-                    f"${profit:+.2f} ({profit_pct:+.2f}%)",
-                    {0: 'نگهداری', 1: 'خرید', 2: 'فروش', 3: 'بستن'}[current['action']],
-                    str(len(self.action_history))
-                ]
-            }
-            return pd.DataFrame(stats)
-        except:
-            return self._create_empty_stats()
-    def _create_empty_stats(self) -> pd.DataFrame:
-        """Create empty stats table"""
-        return pd.DataFrame({
-            'متریک': ['وضعیت'],
-            'مقدار': ['در حال آماده‌سازی...']
-        })
-def create_interface():
-    """Create Gradio interface with proper error handling"""
-    demo = SafeTradingDemo()
-    with gr.Blocks(theme=gr.themes.Soft(), title="🤖 AI Trading Demo") as interface:
-        gr.Markdown("# 🚀 هوش مصنوعی معامله‌گر هوشمند\n**آموزش و معاملات Real-Time**")
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("## ⚙️ تنظیمات")
-                balance = gr.Slider(1000, 50000, value=10000, step=1000, label="سرمایه اولیه ($)")
-                risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="سطح ریسک")
-                asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="نوع دارایی")
-                init_btn = gr.Button("🚀 راه‌اندازی", variant="primary")
-                init_status = gr.Textbox(label="وضعیت", interactive=False)
-            with gr.Column(scale=2):
-                status = gr.Textbox(label="وضعیت کلی", interactive=False, lines=4)
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("## 🎓 آموزش")
-                episodes = gr.Slider(10, 100, value=20, step=5, label="اپیزودها")
-                train_btn = gr.Button("🤖 شروع آموزش", variant="primary")
-            with gr.Column(scale=2):
-                train_plot = gr.Plot(label="پیشرفت آموزش")
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("## 🎯 معاملات زنده")
-                start_btn = gr.Button("▶️ شروع معاملات", variant="secondary")
-                update_btn = gr.Button("🔄 به‌روزرسانی", variant="secondary")
-                stop_btn = gr.Button("⏹️ توقف", variant="stop")
-            with gr.Column(scale=3):
-                live_chart = gr.Plot(label="نمودار زنده")
-        with gr.Row():
-            perf_chart = gr.Plot(label="عملکرد")
-            stats_table = gr.DataFrame(label="آمار", headers=["متریک", "مقدار"])
-        # Event handlers
-        init_btn.click(
-            demo.initialize_environment,
-            inputs=[balance, risk, asset],
-            outputs=[init_status]
-        )
-        train_btn.click(
-            demo.train_agent,
-            inputs=[episodes],
-            outputs=[status, train_plot]
-        )
-        start_btn.click(
-            demo.start_live_trading,
-            outputs=[status, live_chart, perf_chart, stats_table]
-        )
-        update_btn.click(
-            demo.get_live_update,
-            outputs=[status, live_chart, perf_chart, stats_table]
-        )
-        stop_btn.click(
-            demo.stop_live_trading,
-            outputs=[status, live_chart, perf_chart, stats_table]
-        )
-    return interface, demo
-if __name__ == "__main__":
-    logger.info("Starting AI Trading Demo...")
-    interface, demo = create_interface()
-    try:
-        interface.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False,
-            show_error=True,
-            quiet=False
-        )
-    except Exception as e:
-        logger.error(f"Failed to launch interface: {e}")
-        print(f"خطا در راه‌اندازی: {e}")

 import gradio as gr
 import numpy as np
 import torch
+from pathlib import Path
+from typing import Dict, Tuple, Any
+from loguru import logger
+import yaml
+from gymnasium import spaces
+class TradingConfig:
+    def __init__(self):
+        self.initial_balance = 10000.0
+        self.max_steps = 1000
+        self.transaction_cost = 0.001
+        self.risk_level = "Medium"
+        self.asset_type = "Crypto"
+        self.learning_rate = 0.0001
+        self.gamma = 0.99
+        self.epsilon_start = 1.0
+        self.epsilon_min = 0.01
+        self.epsilon_decay = 0.9995
+        self.batch_size = 32
+        self.memory_size = 10000
+        self.target_update = 100
+class AdvancedTradingEnvironment:
+    def __init__(self, config):
+        self.initial_balance = config.initial_balance
+        self.balance = self.initial_balance
+        self.position = 0.0
+        self.current_price = 100.0
+        self.step_count = 0
+        self.max_steps = config.max_steps
+        self.price_history = []
+        self.sentiment_history = []
+        self._initialize_data()
+        self.action_space = spaces.Discrete(4)
+        self.observation_space = spaces.Box(low=-2.0, high=2.0, shape=(12,), dtype=np.float32)
+    def _initialize_data(self):
+        n_points = 100
+        base_price = 100.0
+        for i in range(n_points):
+            price = base_price + np.sin(i * 0.1) * 10 + np.random.normal(0, 2)
+            self.price_history.append(max(10.0, price))
+            sentiment = 0.5 + np.random.normal(0, 0.1)
+            self.sentiment_history.append(np.clip(sentiment, 0.0, 1.0))
+        self.current_price = self.price_history[-1]
+    def reset(self):
+        self.balance = self.initial_balance
+        self.position = 0.0
+        self.step_count = 0
+        self.price_history = [100.0 + np.random.normal(0, 5)]
+        self.sentiment_history = [0.5]
+        obs = self._get_observation()
+        info = self._get_info()
+        return obs, info
+    def step(self, action):
+        self.step_count += 1
+        price_change = np.random.normal(0, 0.02)
+        self.current_price = max(10.0, self.current_price * (1 + price_change))
+        self.price_history.append(self.current_price)
+        sentiment_change = np.random.normal(0, 0.05)
+        new_sentiment = np.clip(self.sentiment_history[-1] + sentiment_change, 0.0, 1.0)
+        self.sentiment_history.append(new_sentiment)
+        reward = self._execute_action(action)
+        terminated = self.balance <= 0 or self.step_count >= self.max_steps
+        truncated = False
+        obs = self._get_observation()
+        info = self._get_info()
+        return obs, reward, terminated, truncated, info
+    def _execute_action(self, action):
+        reward = 0.0
+        prev_net_worth = self.balance + self.position * self.current_price
+        if action == 1:  # Buy
+            trade_amount = min(self.balance * 0.2, self.balance)
+            cost = trade_amount
+            if cost <= self.balance:
+                self.position += trade_amount / self.current_price
+                self.balance -= cost
+        elif action == 2:  # Sell
+            if self.position > 0:
+                sell_amount = min(self.position * 0.2, self.position)
+                proceeds = sell_amount * self.current_price
+                self.position -= sell_amount
+                self.balance += proceeds
+        elif action == 3:  # Close
+            if self.position > 0:
+                proceeds = self.position * self.current_price
+                self.balance += proceeds
+                self.position = 0
+        net_worth = self.balance + self.position * self.current_price
+        reward = (net_worth - prev_net_worth) / self.initial_balance * 100
+        return reward
+    def _get_observation(self):
+        recent_prices = self.price_history[-10:] if len(self.price_history) >= 10 else [self.current_price] * 10
+        recent_sentiments = self.sentiment_history[-10:] if len(self.sentiment_history) >= 10 else [0.5] * 10
+        features = [
+            self.balance / self.initial_balance,
+            self.position * self.current_price / self.initial_balance,
+            self.current_price / 100.0,
+            np.mean(recent_prices) / 100.0,
+            np.std(recent_prices) / 100.0,
+            np.mean(recent_sentiments),
+            np.std(recent_sentiments),
+            self.step_count / self.max_steps,
+            0.0, 0.0, 0.0, 0.0  # Padding
+        ]
+        return np.array(features[:12], dtype=np.float32)
+    def _get_info(self):
+        net_worth = self.balance + self.position * self.current_price
+        return {'net_worth': net_worth}
+class DQNAgent:
+    def __init__(self, state_dim, action_dim, config, device='cpu'):
+        self.device = torch.device(device)
+        self.q_network = torch.nn.Sequential(
+            torch.nn.Linear(state_dim, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, action_dim)
+        ).to(self.device)
+        self.target_network = torch.nn.Sequential(
+            torch.nn.Linear(state_dim, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, action_dim)
+        ).to(self.device)
+        self.target_network.load_state_dict(self.q_network.state_dict())
+        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=config.learning_rate)
+        self.memory = deque(maxlen=config.memory_size)
+        self.gamma = config.gamma
+        self.epsilon = config.epsilon_start
+        self.epsilon_min = config.epsilon_min
+        self.epsilon_decay = config.epsilon_decay
+        self.batch_size = config.batch_size
+        self.target_update = config.target_update
+        self.steps = 0
+    def select_action(self, state, training=True):
+        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        if training and random.random() < self.epsilon:
+            return random.randint(0, 3)
+        with torch.no_grad():
+            return self.q_network(state).argmax(1).item()
+    def store_transition(self, state, action, reward, next_state, done):
+        self.memory.append((state, action, reward, next_state, done))
+    def update(self):
+        if len(self.memory) < self.batch_size:
+            return 0.0
+        batch = random.sample(self.memory, self.batch_size)
+        states, actions, rewards, next_states, dones = zip(*batch)
+        states = torch.FloatTensor(np.array(states)).to(self.device)
+        actions = torch.LongTensor(actions).to(self.device)
+        rewards = torch.FloatTensor(rewards).to(self.device)
+        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+        dones = torch.FloatTensor(dones).to(self.device)
+        current_q = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze(1)
+        next_q = self.target_network(next_states).max(1)[0]
+        target_q = rewards + self.gamma * next_q * (1 - dones)
+        loss = torch.nn.MSELoss()(current_q, target_q)
+        self.optimizer.zero_grad()
+        loss.backward()
+        self.optimizer.step()
+        self.steps += 1
+        if self.steps % self.target_update == 0:
+            self.target_network.load_state_dict(self.q_network.state_dict())
+        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
+        return loss.item()
+class TradingDemo:
+    def __init__(self):
+        self.config = TradingConfig()
+        self.env = None
+        self.agent = None
+        self.device = 'cpu'
+    def initialize(self, balance, risk, asset):
+        self.config.initial_balance = balance
+        self.config.risk_level = risk
+        self.config.asset_type = asset
+        self.env = AdvancedTradingEnvironment(self.config)
+        self.agent = DQNAgent(12, 4, self.config, self.device)
+        return "✅ Initialized!"
+    def train(self, episodes):
+        for ep in range(episodes):
+            obs, _ = self.env.reset()
+            total_reward = 0
+            done = False
+            while not done:
+                action = self.agent.select_action(obs)
+                next_obs, reward, done, _, info = self.env.step(action)
+                self.agent.store_transition(obs, action, reward, next_obs, done)
+                obs = next_obs
+                total_reward += reward
+            self.agent.update()
+            yield f"Episode {ep+1}/{episodes} | Reward: {total_reward:.2f}", None
+        yield "✅ Training complete!", None
+    def simulate(self, steps):
+        obs, _ = self.env.reset()
+        prices = []
+        actions = []
+        net_worths = []
+        for _ in range(steps):
+            action = self.agent.select_action(obs, training=False)
+            next_obs, reward, done, _, info = self.env.step(action)
+            prices.append(self.env.current_price)
+            actions.append(action)
+            net_worths.append(info['net_worth'])
+            obs = next_obs
+            if done:
+                break
+        import plotly.graph_objects as go
+        fig = go.Figure()
+        fig.add_trace(go.Scatter(y=prices, mode='lines', name='Price'))
+        fig.add_trace(go.Scatter(y=net_worths, mode='lines', name='Net Worth'))
+        return "✅ Simulation complete!", fig
+demo = TradingDemo()
+with gr.Blocks() as interface:
+    gr.Markdown("# Trading AI Demo")
+    with gr.Row():
+        balance = gr.Slider(1000, 50000, 10000, label="Balance")
+        risk = gr.Radio(["Low", "Medium", "High"], value="Medium", label="Risk")
+        asset = gr.Radio(["Crypto", "Stock", "Forex"], value="Crypto", label="Asset")
+        init_btn = gr.Button("Initialize")
+    status = gr.Textbox(label="Status")
+    episodes = gr.Number(value=50, label="Episodes")
+    train_btn = gr.Button("Train")
+    train_plot = gr.Plot()
+    steps = gr.Number(value=100, label="Simulation Steps")
+    sim_btn = gr.Button("Simulate")
+    sim_plot = gr.Plot()
+    init_btn.click(demo.initialize, [balance, risk, asset], status)
+    train_btn.click(demo.train, episodes, [status, train_plot])
+    sim_btn.click(demo.simulate, steps, [status, sim_plot])
+interface.launch()