nivakaran commited on
Commit
01d0ae1
·
verified ·
1 Parent(s): 41fbe3c

Upload folder using huggingface_hub

Browse files
airflow/Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Astro CLI Dockerfile for ModelX-Ultimate Airflow
2
+ # Includes all dependencies for ML pipeline execution
3
+
4
+ FROM quay.io/astronomer/astro-runtime:12.3.0
5
+
6
+ # Install system dependencies
7
+ USER root
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ libpq-dev \
11
+ && rm -rf /var/lib/apt/lists/*
12
+ USER astro
13
+
14
+ # Copy requirements and install
15
+ COPY requirements.txt /tmp/requirements.txt
16
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
airflow/README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ModelX-Ultimate Astro Airflow
2
+
3
+ Centralized Apache Airflow setup using Astronomer's Astro CLI for managing all ML pipelines.
4
+
5
+ ## DAGs Overview
6
+
7
+ | DAG | Schedule | Description |
8
+ |-----|----------|-------------|
9
+ | `weather_prediction_daily` | 4:00 AM IST | LSTM model for 25 Sri Lankan districts |
10
+ | `currency_prediction_daily` | 4:05 AM IST | GRU model for USD/LKR forex |
11
+ | `stock_prediction_daily` | 4:15 AM IST | BiLSTM models for 10 stocks |
12
+ | `anomaly_detection_periodic` | Every 6h | Anomaly detection retraining |
13
+
14
+ ## Quick Start
15
+
16
+ ### 1. Install Astro CLI
17
+ ```bash
18
+ # macOS
19
+ brew install astro
20
+
21
+ # Windows (PowerShell as Admin)
22
+ winget install -e --id Astronomer.Astro
23
+
24
+ # Linux
25
+ curl -sSL install.astronomer.io | sudo bash -s
26
+ ```
27
+
28
+ ### 2. Start Airflow
29
+ ```bash
30
+ cd airflow
31
+ astro dev start
32
+ ```
33
+
34
+ ### 3. Access Airflow UI
35
+ - URL: http://localhost:8080
36
+ - Username: `admin`
37
+ - Password: `admin`
38
+
39
+ ### 4. Enable DAGs
40
+ Turn on the DAGs in the Airflow UI to start scheduled runs.
41
+
42
+ ## Directory Structure
43
+ ```
44
+ airflow/
45
+ ├── dags/
46
+ │ ├── weather_prediction_dag.py
47
+ │ ├── currency_prediction_dag.py
48
+ │ ├── stock_prediction_dag.py
49
+ │ └── anomaly_detection_dag.py
50
+ ├── Dockerfile
51
+ ├── requirements.txt
52
+ ├── airflow.env.example
53
+ └── README.md
54
+ ```
55
+
56
+ ## Manual Trigger
57
+ ```bash
58
+ # Trigger a specific DAG
59
+ astro dev run dags trigger weather_prediction_daily
60
+ ```
61
+
62
+ ## Logs
63
+ ```bash
64
+ # View scheduler logs
65
+ astro dev logs --scheduler
66
+
67
+ # View webserver logs
68
+ astro dev logs --webserver
69
+ ```
airflow/airflow.env.example ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ModelX-Ultimate Airflow Configuration
2
+ # =====================================
3
+ # Centralized Astro Apache Airflow setup for all ML pipelines
4
+ #
5
+ # Pipelines managed:
6
+ # - Weather Prediction (4:00 AM IST daily)
7
+ # - Currency Prediction (4:00 AM IST daily)
8
+ # - Stock Prediction (4:00 AM IST daily)
9
+ # - Anomaly Detection (continuous)
10
+
11
+ # Airflow settings
12
+ AIRFLOW_UID=50000
13
+ AIRFLOW_GID=0
14
+
15
+ # Environment
16
+ AIRFLOW__CORE__EXECUTOR=LocalExecutor
17
+ AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
18
+ AIRFLOW__CORE__FERNET_KEY=
19
+ AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
20
+ AIRFLOW__CORE__LOAD_EXAMPLES=false
21
+ AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
22
+
23
+ # Webserver
24
+ AIRFLOW__WEBSERVER__SECRET_KEY=modelx-secret-key-change-in-production
25
+ AIRFLOW__WEBSERVER__EXPOSE_CONFIG=false
26
+
27
+ # Scheduler
28
+ AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK=true
29
+ AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=30
30
+
31
+ # Timezone
32
+ AIRFLOW__CORE__DEFAULT_TIMEZONE=Asia/Colombo
33
+
34
+ # Project paths
35
+ PROJECT_ROOT=/opt/airflow
36
+ MODELS_PATH=/opt/airflow/models
airflow/dags/anomaly_detection_dag.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Anomaly Detection DAG
3
+ Runs every 6 hours
4
+ Retrains anomaly detection model on latest data
5
+ """
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+
12
+ from airflow import DAG
13
+ from airflow.operators.python import PythonOperator
14
+
15
+ # Project paths
16
+ PROJECT_ROOT = Path(__file__).parent.parent.parent
17
+ ANOMALY_MODEL_PATH = PROJECT_ROOT / "models" / "anomaly-detection"
18
+
19
+ default_args = {
20
+ "owner": "modelx",
21
+ "depends_on_past": False,
22
+ "email_on_failure": False,
23
+ "email_on_retry": False,
24
+ "retries": 1,
25
+ "retry_delay": timedelta(minutes=5),
26
+ }
27
+
28
+
29
+ def run_anomaly_training(**context):
30
+ """Run the anomaly detection training pipeline."""
31
+ main_py = ANOMALY_MODEL_PATH / "main.py"
32
+
33
+ if not main_py.exists():
34
+ raise FileNotFoundError(f"Anomaly training script not found: {main_py}")
35
+
36
+ result = subprocess.run(
37
+ [sys.executable, str(main_py)],
38
+ capture_output=True,
39
+ text=True,
40
+ cwd=str(ANOMALY_MODEL_PATH)
41
+ )
42
+
43
+ print("STDOUT:", result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
44
+ if result.stderr:
45
+ print("STDERR:", result.stderr[-1000:])
46
+
47
+ if result.returncode != 0:
48
+ raise Exception(f"Anomaly training failed with exit code {result.returncode}")
49
+
50
+ return True
51
+
52
+
53
+ with DAG(
54
+ dag_id="anomaly_detection_periodic",
55
+ default_args=default_args,
56
+ description="Periodic anomaly detection model retraining",
57
+ schedule_interval="0 */6 * * *", # Every 6 hours
58
+ start_date=datetime(2024, 12, 1),
59
+ catchup=False,
60
+ tags=["anomaly", "ml", "detection", "periodic"],
61
+ max_active_runs=1,
62
+ ) as dag:
63
+
64
+ train_anomaly = PythonOperator(
65
+ task_id="train_anomaly_model",
66
+ python_callable=run_anomaly_training,
67
+ provide_context=True,
68
+ execution_timeout=timedelta(minutes=30),
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ print(f"Anomaly Detection DAG - Schedule: Every 6 hours")
airflow/dags/currency_prediction_dag.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Currency Prediction DAG
3
+ Runs daily at 4:00 AM IST (22:30 UTC)
4
+ Trains GRU model for USD/LKR exchange rate prediction
5
+ """
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+
12
+ from airflow import DAG
13
+ from airflow.operators.python import PythonOperator
14
+
15
+ # Project paths
16
+ PROJECT_ROOT = Path(__file__).parent.parent.parent
17
+ CURRENCY_MODEL_PATH = PROJECT_ROOT / "models" / "currency-volatility-prediction"
18
+
19
+ default_args = {
20
+ "owner": "modelx",
21
+ "depends_on_past": False,
22
+ "email_on_failure": False,
23
+ "email_on_retry": False,
24
+ "retries": 2,
25
+ "retry_delay": timedelta(minutes=10),
26
+ }
27
+
28
+
29
+ def run_currency_training(**context):
30
+ """Run the currency prediction training pipeline."""
31
+ main_py = CURRENCY_MODEL_PATH / "main.py"
32
+
33
+ if not main_py.exists():
34
+ raise FileNotFoundError(f"Currency training script not found: {main_py}")
35
+
36
+ result = subprocess.run(
37
+ [sys.executable, str(main_py), "--mode", "full"],
38
+ capture_output=True,
39
+ text=True,
40
+ cwd=str(CURRENCY_MODEL_PATH)
41
+ )
42
+
43
+ print("STDOUT:", result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout)
44
+ if result.stderr:
45
+ print("STDERR:", result.stderr[-1000:])
46
+
47
+ if result.returncode != 0:
48
+ raise Exception(f"Currency training failed with exit code {result.returncode}")
49
+
50
+ return True
51
+
52
+
53
+ with DAG(
54
+ dag_id="currency_prediction_daily",
55
+ default_args=default_args,
56
+ description="Daily USD/LKR exchange rate prediction using GRU model",
57
+ schedule_interval="35 22 * * *", # 4:05 AM IST = 22:35 UTC (staggered)
58
+ start_date=datetime(2024, 12, 1),
59
+ catchup=False,
60
+ tags=["currency", "ml", "prediction", "gru", "daily", "forex"],
61
+ max_active_runs=1,
62
+ ) as dag:
63
+
64
+ train_currency = PythonOperator(
65
+ task_id="train_currency_model",
66
+ python_callable=run_currency_training,
67
+ provide_context=True,
68
+ execution_timeout=timedelta(hours=1),
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ print(f"Currency Prediction DAG - Schedule: 4:05 AM IST daily")
airflow/dags/stock_prediction_dag.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stock Prediction DAG
3
+ Runs daily at 4:15 AM IST (22:45 UTC)
4
+ Trains BiLSTM models for 10 stocks
5
+ """
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+
12
+ from airflow import DAG
13
+ from airflow.operators.python import PythonOperator
14
+
15
+ # Project paths
16
+ PROJECT_ROOT = Path(__file__).parent.parent.parent
17
+ STOCK_MODEL_PATH = PROJECT_ROOT / "models" / "stock-price-prediction"
18
+
19
+ default_args = {
20
+ "owner": "modelx",
21
+ "depends_on_past": False,
22
+ "email_on_failure": False,
23
+ "email_on_retry": False,
24
+ "retries": 2,
25
+ "retry_delay": timedelta(minutes=10),
26
+ }
27
+
28
+
29
+ def run_stock_training(**context):
30
+ """Run the multi-stock training pipeline."""
31
+ main_py = STOCK_MODEL_PATH / "main.py"
32
+
33
+ if not main_py.exists():
34
+ raise FileNotFoundError(f"Stock training script not found: {main_py}")
35
+
36
+ result = subprocess.run(
37
+ [sys.executable, str(main_py)],
38
+ capture_output=True,
39
+ text=True,
40
+ cwd=str(STOCK_MODEL_PATH)
41
+ )
42
+
43
+ print("STDOUT:", result.stdout[-5000:] if len(result.stdout) > 5000 else result.stdout)
44
+ if result.stderr:
45
+ print("STDERR:", result.stderr[-2000:])
46
+
47
+ if result.returncode != 0:
48
+ raise Exception(f"Stock training failed with exit code {result.returncode}")
49
+
50
+ return True
51
+
52
+
53
+ with DAG(
54
+ dag_id="stock_prediction_daily",
55
+ default_args=default_args,
56
+ description="Daily stock prediction for 10 stocks using BiLSTM",
57
+ schedule_interval="45 22 * * *", # 4:15 AM IST = 22:45 UTC (staggered)
58
+ start_date=datetime(2024, 12, 1),
59
+ catchup=False,
60
+ tags=["stock", "ml", "prediction", "lstm", "daily"],
61
+ max_active_runs=1,
62
+ ) as dag:
63
+
64
+ train_stocks = PythonOperator(
65
+ task_id="train_all_stocks",
66
+ python_callable=run_stock_training,
67
+ provide_context=True,
68
+ execution_timeout=timedelta(hours=4), # 10 stocks take time
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ print(f"Stock Prediction DAG - Schedule: 4:15 AM IST daily")
airflow/dags/weather_prediction_dag.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Weather Prediction DAG
3
+ Runs daily at 4:00 AM IST (22:30 UTC)
4
+ Trains LSTM model for 25 Sri Lankan districts
5
+ """
6
+ import os
7
+ import sys
8
+ import subprocess
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+
12
+ from airflow import DAG
13
+ from airflow.operators.python import PythonOperator
14
+ from airflow.operators.bash import BashOperator
15
+
16
+ # Project paths
17
+ PROJECT_ROOT = Path(__file__).parent.parent.parent
18
+ WEATHER_MODEL_PATH = PROJECT_ROOT / "models" / "weather-prediction"
19
+
20
+ default_args = {
21
+ "owner": "modelx",
22
+ "depends_on_past": False,
23
+ "email_on_failure": False,
24
+ "email_on_retry": False,
25
+ "retries": 2,
26
+ "retry_delay": timedelta(minutes=10),
27
+ }
28
+
29
+
30
+ def run_weather_training(**context):
31
+ """Run the weather prediction training pipeline."""
32
+ main_py = WEATHER_MODEL_PATH / "main.py"
33
+
34
+ if not main_py.exists():
35
+ raise FileNotFoundError(f"Weather training script not found: {main_py}")
36
+
37
+ result = subprocess.run(
38
+ [sys.executable, str(main_py), "--mode", "full"],
39
+ capture_output=True,
40
+ text=True,
41
+ cwd=str(WEATHER_MODEL_PATH)
42
+ )
43
+
44
+ print("STDOUT:", result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout)
45
+ if result.stderr:
46
+ print("STDERR:", result.stderr[-1000:])
47
+
48
+ if result.returncode != 0:
49
+ raise Exception(f"Weather training failed with exit code {result.returncode}")
50
+
51
+ return True
52
+
53
+
54
+ with DAG(
55
+ dag_id="weather_prediction_daily",
56
+ default_args=default_args,
57
+ description="Daily weather prediction model training for 25 Sri Lankan districts",
58
+ schedule_interval="30 22 * * *", # 4:00 AM IST = 22:30 UTC
59
+ start_date=datetime(2024, 12, 1),
60
+ catchup=False,
61
+ tags=["weather", "ml", "prediction", "lstm", "daily"],
62
+ max_active_runs=1,
63
+ ) as dag:
64
+
65
+ train_weather = PythonOperator(
66
+ task_id="train_weather_model",
67
+ python_callable=run_weather_training,
68
+ provide_context=True,
69
+ execution_timeout=timedelta(hours=2),
70
+ )
71
+
72
+
73
+ if __name__ == "__main__":
74
+ print(f"Weather Prediction DAG - Schedule: 4:00 AM IST daily")
airflow/requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ModelX-Ultimate Airflow Requirements
2
+ # ML Pipeline Dependencies
3
+
4
+ # Core ML
5
+ tensorflow>=2.15.0
6
+ scikit-learn>=1.3.0
7
+ numpy>=1.24.0
8
+ pandas>=2.0.0
9
+
10
+ # Stock data
11
+ yfinance>=0.2.36
12
+
13
+ # MLflow tracking
14
+ mlflow>=2.9.0
15
+
16
+ # Data processing
17
+ joblib>=1.3.0
18
+
19
+ # HTTP requests (for API calls)
20
+ requests>=2.31.0
21
+
22
+ # Environment management
23
+ python-dotenv>=1.0.0
frontend/app/components/dashboard/DashboardOverview.tsx CHANGED
@@ -190,29 +190,43 @@ const DashboardOverview = () => {
190
  </div>
191
  </Card>
192
 
193
- {/* Operational Risk Radar */}
194
- <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
195
- <Card className="p-6 bg-card border-border">
196
- <Cloud className="w-8 h-8 text-warning mb-3" />
197
- <p className="text-2xl font-bold">{Math.round(dashboard.logistics_friction * 100)}%</p>
198
- <p className="text-xs text-muted-foreground uppercase">Logistics Friction</p>
199
- </Card>
200
- <Card className="p-6 bg-card border-border">
201
- <AlertTriangle className="w-8 h-8 text-destructive mb-3" />
202
- <p className="text-2xl font-bold">{Math.round(dashboard.compliance_volatility * 100)}%</p>
203
- <p className="text-xs text-muted-foreground uppercase">Compliance Volatility</p>
204
- </Card>
205
- <Card className="p-6 bg-card border-border">
206
- <TrendingUp className="w-8 h-8 text-info mb-3" />
207
- <p className="text-2xl font-bold">{Math.round(dashboard.market_instability * 100)}%</p>
208
- <p className="text-xs text-muted-foreground uppercase">Market Instability</p>
209
- </Card>
210
- <Card className="p-6 bg-card border-border">
211
- <Building className="w-8 h-8 text-success mb-3" />
212
- <p className="text-2xl font-bold">{Math.round(dashboard.opportunity_index * 100)}%</p>
213
- <p className="text-xs text-muted-foreground uppercase">Opportunity Index</p>
214
- </Card>
215
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  </div>
217
  );
218
  };
 
190
  </div>
191
  </Card>
192
 
193
+ {/* Operational Risk Indicators - Computed from Events */}
194
+ {sortedEvents.length > 0 && (
195
+ <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
196
+ <Card className="p-6 bg-card border-border">
197
+ <Cloud className="w-8 h-8 text-warning mb-3" />
198
+ <p className="text-2xl font-bold">
199
+ {Math.min(100, Math.round(
200
+ (sortedEvents.filter(e => e.domain === 'meteorological' || e.summary?.toLowerCase().includes('weather')).length / Math.max(sortedEvents.length, 1)) * 100 * 3
201
+ ))}%
202
+ </p>
203
+ <p className="text-xs text-muted-foreground uppercase">Weather Impact</p>
204
+ </Card>
205
+ <Card className="p-6 bg-card border-border">
206
+ <AlertTriangle className="w-8 h-8 text-destructive mb-3" />
207
+ <p className="text-2xl font-bold">
208
+ {Math.round((criticalEvents.length / Math.max(sortedEvents.length, 1)) * 100)}%
209
+ </p>
210
+ <p className="text-xs text-muted-foreground uppercase">Critical Risk Level</p>
211
+ </Card>
212
+ <Card className="p-6 bg-card border-border">
213
+ <TrendingUp className="w-8 h-8 text-info mb-3" />
214
+ <p className="text-2xl font-bold">
215
+ {Math.min(100, Math.round(
216
+ (sortedEvents.filter(e => e.domain === 'economical' || e.domain === 'market').length / Math.max(sortedEvents.length, 1)) * 100 * 3
217
+ ))}%
218
+ </p>
219
+ <p className="text-xs text-muted-foreground uppercase">Market Activity</p>
220
+ </Card>
221
+ <Card className="p-6 bg-card border-border">
222
+ <Building className="w-8 h-8 text-success mb-3" />
223
+ <p className="text-2xl font-bold">
224
+ {Math.round((opportunityEvents.length / Math.max(sortedEvents.length, 1)) * 100)}%
225
+ </p>
226
+ <p className="text-xs text-muted-foreground uppercase">Opportunity Index</p>
227
+ </Card>
228
+ </div>
229
+ )}
230
  </div>
231
  );
232
  };
main.py CHANGED
@@ -90,14 +90,13 @@ def check_and_train_models():
90
  "name": "Stock Prediction",
91
  "check_paths": [
92
  PROJECT_ROOT / "models" / "stock-price-prediction"
93
- / "artifacts" / "models",
94
  ],
95
- "check_files": ["*.h5", "*.keras"],
96
  "train_cmd": [
97
  sys.executable,
98
  str(PROJECT_ROOT / "models" / "stock-price-prediction"
99
- / "main.py"),
100
- "--mode", "full"
101
  ]
102
  },
103
  ]
@@ -1710,9 +1709,29 @@ async def get_currency_prediction():
1710
  predictor = get_currency_predictor()
1711
 
1712
  if predictor is None:
 
 
 
 
 
 
 
1713
  return {
1714
- "status": "unavailable",
1715
- "message": "Currency prediction model not loaded"
 
 
 
 
 
 
 
 
 
 
 
 
 
1716
  }
1717
 
1718
  try:
 
90
  "name": "Stock Prediction",
91
  "check_paths": [
92
  PROJECT_ROOT / "models" / "stock-price-prediction"
93
+ / "Artifacts",
94
  ],
95
+ "check_files": ["*.pkl", "*.h5", "*.keras"],
96
  "train_cmd": [
97
  sys.executable,
98
  str(PROJECT_ROOT / "models" / "stock-price-prediction"
99
+ / "main.py")
 
100
  ]
101
  },
102
  ]
 
1709
  predictor = get_currency_predictor()
1710
 
1711
  if predictor is None:
1712
+ # Generate fallback prediction inline
1713
+ import numpy as np
1714
+ current_rate = 298.0
1715
+ np.random.seed(int(datetime.now().timestamp()) % 2**31)
1716
+ change_pct = np.random.normal(0.05, 0.3)
1717
+ predicted_rate = current_rate * (1 + change_pct / 100)
1718
+
1719
  return {
1720
+ "status": "success",
1721
+ "prediction": {
1722
+ "prediction_date": (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d"),
1723
+ "generated_at": datetime.now().isoformat(),
1724
+ "model_version": "fallback",
1725
+ "is_fallback": True,
1726
+ "current_rate": round(current_rate, 2),
1727
+ "predicted_rate": round(predicted_rate, 2),
1728
+ "expected_change": round(predicted_rate - current_rate, 2),
1729
+ "expected_change_pct": round(change_pct, 3),
1730
+ "direction": "strengthening" if change_pct < 0 else "weakening",
1731
+ "direction_emoji": "📈" if change_pct < 0 else "📉",
1732
+ "volatility_class": "low",
1733
+ "note": "Using fallback - model initializing"
1734
+ }
1735
  }
1736
 
1737
  try:
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_ingestion/feature_store/stock_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_ingestion/ingested/test.csv CHANGED
The diff for this file is too large to render. See raw diff
 
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_ingestion/ingested/train.csv CHANGED
The diff for this file is too large to render. See raw diff
 
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_validation/drift_report/report.yaml CHANGED
@@ -21,4 +21,4 @@ StockName:
21
  p_value: 1.0
22
  Volume:
23
  drift_status: true
24
- p_value: 3.715e-321
 
21
  p_value: 1.0
22
  Volume:
23
  drift_status: true
24
+ p_value: 9.305833669220032e-37
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_validation/validated/test.csv CHANGED
The diff for this file is too large to render. See raw diff
 
models/stock-price-prediction/Artifacts/12_10_2025_03_04_56/data_validation/validated/train.csv CHANGED
The diff for this file is too large to render. See raw diff
 
src/graphs/vectorizationAgentGraph.py CHANGED
@@ -77,4 +77,3 @@ graph = VectorizationGraphBuilder(llm).build_graph()
77
 
78
  print("[OK] Vectorization Agent Graph compiled successfully")
79
  print("=" * 60 + "\n")
80
-
 
77
 
78
  print("[OK] Vectorization Agent Graph compiled successfully")
79
  print("=" * 60 + "\n")
 
src/nodes/vectorizationAgentNode.py CHANGED
@@ -389,7 +389,7 @@ class VectorizationAgentNode:
389
 
390
  # For non-English languages, skip anomaly detection
391
  # The ML model was trained on English embeddings only.
392
- # Different BERT models (SinhalaBERTo, Tamil-BERT) have completely
393
  # different embedding spaces - Tamil embeddings have magnitude ~0.64
394
  # while English has ~7.5 and Sinhala ~13.7. They cannot be compared.
395
  if language in ["sinhala", "tamil"]:
@@ -453,7 +453,7 @@ class VectorizationAgentNode:
453
  def run_trending_detection(self, state: VectorizationAgentState) -> Dict[str, Any]:
454
  """
455
  Step 2.6: Detect trending topics from the input texts.
456
-
457
  Extracts key entities/topics and tracks their mention velocity.
458
  Identifies:
459
  - Trending topics (momentum > 2x normal)
@@ -461,9 +461,9 @@ class VectorizationAgentNode:
461
  - Topics with increasing momentum
462
  """
463
  logger.info("[VectorizationAgent] STEP 2.6: Trending Detection")
464
-
465
  detection_results = state.get("language_detection_results", [])
466
-
467
  if not detection_results:
468
  logger.warning("[VectorizationAgent] No texts for trending detection")
469
  return {
@@ -475,7 +475,7 @@ class VectorizationAgentNode:
475
  "spike_alerts": [],
476
  },
477
  }
478
-
479
  # Import trending detector
480
  try:
481
  from src.utils.trending_detector import (
@@ -484,11 +484,12 @@ class VectorizationAgentNode:
484
  get_trending_now,
485
  get_spikes,
486
  )
 
487
  TRENDING_AVAILABLE = True
488
  except ImportError as e:
489
  logger.warning(f"[VectorizationAgent] Trending detector not available: {e}")
490
  TRENDING_AVAILABLE = False
491
-
492
  if not TRENDING_AVAILABLE:
493
  return {
494
  "current_step": "trending_detection",
@@ -499,19 +500,19 @@ class VectorizationAgentNode:
499
  "spike_alerts": [],
500
  },
501
  }
502
-
503
  # Extract entities and record mentions
504
  entities_found = []
505
-
506
  for item in detection_results:
507
  text = item.get("text", "") # Field is 'text', not 'original_text'
508
  language = item.get("language", "english")
509
  post_id = item.get("post_id", "")
510
-
511
  # Simple entity extraction (keywords, capitalized words, etc.)
512
  # In production, you'd use NER or more sophisticated extraction
513
  extracted = self._extract_entities(text, language)
514
-
515
  for entity in extracted:
516
  try:
517
  # Record mention with trending detector
@@ -520,15 +521,17 @@ class VectorizationAgentNode:
520
  source=entity.get("source", "feed"),
521
  domain=entity.get("domain", "general"),
522
  )
523
- entities_found.append({
524
- "entity": entity["text"],
525
- "type": entity.get("type", "keyword"),
526
- "post_id": post_id,
527
- "language": language,
528
- })
 
 
529
  except Exception as e:
530
  logger.debug(f"[VectorizationAgent] Failed to record mention: {e}")
531
-
532
  # Get current trending topics and spikes
533
  try:
534
  trending_topics = get_trending_now(limit=10)
@@ -537,12 +540,12 @@ class VectorizationAgentNode:
537
  logger.warning(f"[VectorizationAgent] Failed to get trending data: {e}")
538
  trending_topics = []
539
  spike_alerts = []
540
-
541
  logger.info(
542
  f"[VectorizationAgent] Trending detection: {len(entities_found)} entities, "
543
  f"{len(trending_topics)} trending, {len(spike_alerts)} spikes"
544
  )
545
-
546
  return {
547
  "current_step": "trending_detection",
548
  "trending_results": {
@@ -553,65 +556,97 @@ class VectorizationAgentNode:
553
  "spike_alerts": spike_alerts,
554
  },
555
  }
556
-
557
- def _extract_entities(self, text: str, language: str = "english") -> List[Dict[str, Any]]:
 
 
558
  """
559
  Extract entities/topics from text for trending tracking.
560
-
561
  Uses simple heuristics:
562
  - Capitalized words/phrases (potential proper nouns)
563
  - Hashtags
564
  - Common news keywords
565
-
566
  In production, integrate with NER model for better extraction.
567
  """
568
  entities = []
569
-
570
  if not text:
571
  return entities
572
-
573
  import re
574
-
575
  # Extract hashtags
576
- hashtags = re.findall(r'#(\w+)', text)
577
  for tag in hashtags:
578
- entities.append({
579
- "text": tag.lower(),
580
- "type": "hashtag",
581
- "source": "hashtag",
582
- "domain": "social",
583
- })
584
-
 
 
585
  # Extract capitalized phrases (potential proper nouns)
586
  # Match 1-4 consecutive capitalized words
587
- cap_phrases = re.findall(r'\b([A-Z][a-z]+(?: [A-Z][a-z]+){0,3})\b', text)
588
  for phrase in cap_phrases:
589
  # Skip common words
590
- if phrase.lower() not in ['the', 'a', 'an', 'is', 'are', 'was', 'were', 'i', 'he', 'she', 'it']:
591
- entities.append({
592
- "text": phrase,
593
- "type": "proper_noun",
594
- "source": "text",
595
- "domain": "general",
596
- })
597
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  # News/event keywords
599
  news_keywords = [
600
- 'breaking', 'urgent', 'alert', 'emergency', 'crisis',
601
- 'earthquake', 'flood', 'tsunami', 'election', 'protest',
602
- 'strike', 'scandal', 'corruption', 'price', 'inflation',
 
 
 
 
 
 
 
 
 
 
 
 
603
  ]
604
-
605
  text_lower = text.lower()
606
  for keyword in news_keywords:
607
  if keyword in text_lower:
608
- entities.append({
609
- "text": keyword,
610
- "type": "news_keyword",
611
- "source": "keyword_match",
612
- "domain": "news",
613
- })
614
-
 
 
615
  # Deduplicate by text
616
  seen = set()
617
  unique_entities = []
@@ -620,9 +655,8 @@ class VectorizationAgentNode:
620
  if key not in seen:
621
  seen.add(key)
622
  unique_entities.append(e)
623
-
624
- return unique_entities[:15] # Limit entities per text
625
 
 
626
 
627
  def generate_expert_summary(self, state: VectorizationAgentState) -> Dict[str, Any]:
628
  """
 
389
 
390
  # For non-English languages, skip anomaly detection
391
  # The ML model was trained on English embeddings only.
392
+ # Different BERT models (SinhalaBERTo, Tamil-BERT) have completely
393
  # different embedding spaces - Tamil embeddings have magnitude ~0.64
394
  # while English has ~7.5 and Sinhala ~13.7. They cannot be compared.
395
  if language in ["sinhala", "tamil"]:
 
453
  def run_trending_detection(self, state: VectorizationAgentState) -> Dict[str, Any]:
454
  """
455
  Step 2.6: Detect trending topics from the input texts.
456
+
457
  Extracts key entities/topics and tracks their mention velocity.
458
  Identifies:
459
  - Trending topics (momentum > 2x normal)
 
461
  - Topics with increasing momentum
462
  """
463
  logger.info("[VectorizationAgent] STEP 2.6: Trending Detection")
464
+
465
  detection_results = state.get("language_detection_results", [])
466
+
467
  if not detection_results:
468
  logger.warning("[VectorizationAgent] No texts for trending detection")
469
  return {
 
475
  "spike_alerts": [],
476
  },
477
  }
478
+
479
  # Import trending detector
480
  try:
481
  from src.utils.trending_detector import (
 
484
  get_trending_now,
485
  get_spikes,
486
  )
487
+
488
  TRENDING_AVAILABLE = True
489
  except ImportError as e:
490
  logger.warning(f"[VectorizationAgent] Trending detector not available: {e}")
491
  TRENDING_AVAILABLE = False
492
+
493
  if not TRENDING_AVAILABLE:
494
  return {
495
  "current_step": "trending_detection",
 
500
  "spike_alerts": [],
501
  },
502
  }
503
+
504
  # Extract entities and record mentions
505
  entities_found = []
506
+
507
  for item in detection_results:
508
  text = item.get("text", "") # Field is 'text', not 'original_text'
509
  language = item.get("language", "english")
510
  post_id = item.get("post_id", "")
511
+
512
  # Simple entity extraction (keywords, capitalized words, etc.)
513
  # In production, you'd use NER or more sophisticated extraction
514
  extracted = self._extract_entities(text, language)
515
+
516
  for entity in extracted:
517
  try:
518
  # Record mention with trending detector
 
521
  source=entity.get("source", "feed"),
522
  domain=entity.get("domain", "general"),
523
  )
524
+ entities_found.append(
525
+ {
526
+ "entity": entity["text"],
527
+ "type": entity.get("type", "keyword"),
528
+ "post_id": post_id,
529
+ "language": language,
530
+ }
531
+ )
532
  except Exception as e:
533
  logger.debug(f"[VectorizationAgent] Failed to record mention: {e}")
534
+
535
  # Get current trending topics and spikes
536
  try:
537
  trending_topics = get_trending_now(limit=10)
 
540
  logger.warning(f"[VectorizationAgent] Failed to get trending data: {e}")
541
  trending_topics = []
542
  spike_alerts = []
543
+
544
  logger.info(
545
  f"[VectorizationAgent] Trending detection: {len(entities_found)} entities, "
546
  f"{len(trending_topics)} trending, {len(spike_alerts)} spikes"
547
  )
548
+
549
  return {
550
  "current_step": "trending_detection",
551
  "trending_results": {
 
556
  "spike_alerts": spike_alerts,
557
  },
558
  }
559
+
560
+ def _extract_entities(
561
+ self, text: str, language: str = "english"
562
+ ) -> List[Dict[str, Any]]:
563
  """
564
  Extract entities/topics from text for trending tracking.
565
+
566
  Uses simple heuristics:
567
  - Capitalized words/phrases (potential proper nouns)
568
  - Hashtags
569
  - Common news keywords
570
+
571
  In production, integrate with NER model for better extraction.
572
  """
573
  entities = []
574
+
575
  if not text:
576
  return entities
577
+
578
  import re
579
+
580
  # Extract hashtags
581
+ hashtags = re.findall(r"#(\w+)", text)
582
  for tag in hashtags:
583
+ entities.append(
584
+ {
585
+ "text": tag.lower(),
586
+ "type": "hashtag",
587
+ "source": "hashtag",
588
+ "domain": "social",
589
+ }
590
+ )
591
+
592
  # Extract capitalized phrases (potential proper nouns)
593
  # Match 1-4 consecutive capitalized words
594
+ cap_phrases = re.findall(r"\b([A-Z][a-z]+(?: [A-Z][a-z]+){0,3})\b", text)
595
  for phrase in cap_phrases:
596
  # Skip common words
597
+ if phrase.lower() not in [
598
+ "the",
599
+ "a",
600
+ "an",
601
+ "is",
602
+ "are",
603
+ "was",
604
+ "were",
605
+ "i",
606
+ "he",
607
+ "she",
608
+ "it",
609
+ ]:
610
+ entities.append(
611
+ {
612
+ "text": phrase,
613
+ "type": "proper_noun",
614
+ "source": "text",
615
+ "domain": "general",
616
+ }
617
+ )
618
+
619
  # News/event keywords
620
  news_keywords = [
621
+ "breaking",
622
+ "urgent",
623
+ "alert",
624
+ "emergency",
625
+ "crisis",
626
+ "earthquake",
627
+ "flood",
628
+ "tsunami",
629
+ "election",
630
+ "protest",
631
+ "strike",
632
+ "scandal",
633
+ "corruption",
634
+ "price",
635
+ "inflation",
636
  ]
637
+
638
  text_lower = text.lower()
639
  for keyword in news_keywords:
640
  if keyword in text_lower:
641
+ entities.append(
642
+ {
643
+ "text": keyword,
644
+ "type": "news_keyword",
645
+ "source": "keyword_match",
646
+ "domain": "news",
647
+ }
648
+ )
649
+
650
  # Deduplicate by text
651
  seen = set()
652
  unique_entities = []
 
655
  if key not in seen:
656
  seen.add(key)
657
  unique_entities.append(e)
 
 
658
 
659
+ return unique_entities[:15] # Limit entities per text
660
 
661
  def generate_expert_summary(self, state: VectorizationAgentState) -> Dict[str, Any]:
662
  """