samithcs commited on
Commit
1eaee2c
·
1 Parent(s): deadecc

Add Chainlit app files (Dockerfile, app.py, requirements.txt, src) and update README

Browse files
Files changed (38) hide show
  1. Dockerfile +22 -0
  2. README.md +3 -4
  3. app.py +146 -0
  4. requirements.txt +31 -0
  5. src/app/__pycache__/app.cpython-311.pyc +0 -0
  6. src/app/__pycache__/chatbot.cpython-311.pyc +0 -0
  7. src/app/__pycache__/chatbot.cpython-313.pyc +0 -0
  8. src/app/__pycache__/fastapi_server.cpython-311.pyc +0 -0
  9. src/app/app.py +142 -0
  10. src/app/fastapi_server.py +104 -0
  11. src/components/__init__.py +0 -0
  12. src/components/__pycache__/api_gnews_fetcher.cpython-311.pyc +0 -0
  13. src/components/__pycache__/api_weather_fetcher.cpython-311.pyc +0 -0
  14. src/components/__pycache__/data_ingestion.cpython-313.pyc +0 -0
  15. src/components/__pycache__/model_nlp_intent.cpython-311.pyc +0 -0
  16. src/components/__pycache__/model_nlp_ner.cpython-311.pyc +0 -0
  17. src/components/__pycache__/model_risk_predictor.cpython-311.pyc +0 -0
  18. src/components/__pycache__/recommendation_engine.cpython-311.pyc +0 -0
  19. src/components/api_gnews_fetcher.py +40 -0
  20. src/components/api_weather_fetcher.py +65 -0
  21. src/components/data_cleaning.py +96 -0
  22. src/components/data_ingestion.py +60 -0
  23. src/components/feature_engineering.py +96 -0
  24. src/components/model_nlp_intent.py +142 -0
  25. src/components/model_nlp_ner.py +234 -0
  26. src/components/model_risk_predictor.py +273 -0
  27. src/components/model_timeseries_risk.py +100 -0
  28. src/components/recommendation_engine.py +103 -0
  29. src/config/__init__.py +0 -0
  30. src/config/__pycache__/config.cpython-311.pyc +0 -0
  31. src/config/config.py +10 -0
  32. src/pipeline/__init__.py +0 -0
  33. src/pipeline/__pycache__/data_refresh_workflow.cpython-311.pyc +0 -0
  34. src/pipeline/data_refresh_workflow.py +66 -0
  35. src/utils/__init__.py +0 -0
  36. src/utils/__pycache__/logger.cpython-311.pyc +0 -0
  37. src/utils/__pycache__/logger.cpython-313.pyc +0 -0
  38. src/utils/logger.py +13 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- Base image ----
2
+ FROM python:3.11-slim
3
+
4
+ # ---- Working directory ----
5
+ WORKDIR /app
6
+
7
+ # ---- Copy requirements first for efficient caching ----
8
+ COPY requirements.txt .
9
+
10
+ # ---- Install dependencies ----
11
+ RUN pip install --upgrade pip setuptools wheel \
12
+ && pip install --no-cache-dir -r requirements.txt
13
+
14
+ # ---- Copy the full project ----
15
+ COPY . .
16
+
17
+ # ---- Use Hugging Face's expected port ----
18
+ EXPOSE 7860
19
+
20
+ # ---- Run Chainlit ----
21
+
22
+ CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
  title: Chainlit Supplychain App
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Chainlit Supplychain App
3
+ emoji: 📈
4
+ colorFrom: purple
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+
5
+ sys.path.insert(0, str(Path(__file__).parent))
6
+
7
+
8
+
9
+ import chainlit as cl
10
+
11
+ from components.model_nlp_intent import predict_intent
12
+ from components.model_nlp_ner import extract_entities_pipeline
13
+ from components.model_risk_predictor import predict_risk
14
+ from components.recommendation_engine import generate_recommendation
15
+
16
+
17
+ @cl.on_message
18
+ async def handle_message(msg: cl.Message):
19
+ query = msg.content
20
+ session = cl.user_session
21
+
22
+
23
+ intent_result = predict_intent(query)
24
+ intent = intent_result["intent"]
25
+ confidence = intent_result["confidence"]
26
+
27
+
28
+ entities = extract_entities_pipeline(query)
29
+
30
+
31
+ region = None
32
+ origin = None
33
+ destination = None
34
+
35
+ if entities.get("location"):
36
+ locations = entities["location"]
37
+ if isinstance(locations, list) and len(locations) > 0:
38
+ region = locations[0]
39
+
40
+ if len(locations) > 1:
41
+ origin = locations[0]
42
+ destination = locations[1]
43
+ else:
44
+ region = locations
45
+
46
+ if not region:
47
+ region = "Mumbai"
48
+
49
+
50
+ incidents = []
51
+ event_type = None
52
+
53
+ if entities.get("event"):
54
+ events = entities["event"]
55
+ if isinstance(events, list):
56
+ incidents = events
57
+ event_type = events[0] if events else None
58
+ else:
59
+ incidents = [events]
60
+ event_type = events
61
+
62
+
63
+ risk_score = predict_risk(
64
+ region=region,
65
+ days=5,
66
+ origin=origin,
67
+ destination=destination,
68
+ event_type=event_type,
69
+ incidents=incidents
70
+ )
71
+
72
+
73
+ recent_incidents = incidents if incidents else ["port strike", "supplier outage"]
74
+ weather_alert = "Typhoon warning" if region == "Shanghai" else None
75
+
76
+ advice = generate_recommendation(
77
+ risk_score=risk_score,
78
+ region=region,
79
+ recent_incidents=recent_incidents,
80
+ weather_alert=weather_alert,
81
+ intent=intent
82
+ )
83
+
84
+
85
+ if risk_score >= 0.7:
86
+ risk_emoji = "🔴"
87
+ risk_level = "High"
88
+ elif risk_score >= 0.4:
89
+ risk_emoji = "🟡"
90
+ risk_level = "Medium"
91
+ else:
92
+ risk_emoji = "🟢"
93
+ risk_level = "Low"
94
+
95
+
96
+ response = (
97
+ f"### 📊 Supply Chain Risk Analysis\n\n"
98
+ f"**Region:** {region}\n"
99
+ f"**Intent:** {intent} (Confidence: {confidence:.2%})\n"
100
+ f"**Entities:** {entities}\n"
101
+ )
102
+
103
+
104
+ if origin and destination:
105
+ response += f"**Route:** {origin} → {destination}\n"
106
+
107
+
108
+ if incidents:
109
+ response += f"**⚠️ Detected Events:** {', '.join(incidents)}\n"
110
+
111
+ response += f"**Risk Score:** {risk_emoji} **{risk_level}** ({risk_score:.2f})\n\n"
112
+ response += f"**💡 Recommendation:**\n{advice['message']}\n"
113
+
114
+ await cl.Message(
115
+ content=response,
116
+ author="Supply Chain Risk Analysis"
117
+ ).send()
118
+
119
+ # Send Alert Level
120
+ alert_emoji = "🚨" if risk_score >= 0.7 else "⚠️" if risk_score >= 0.4 else "✅"
121
+ await cl.Message(
122
+ content=f"{alert_emoji} **Alert Level:** {advice['action'].upper()}",
123
+ author="Alert Level"
124
+ ).send()
125
+
126
+
127
+ @cl.on_chat_start
128
+ async def welcome():
129
+ await cl.Message(
130
+ content=(
131
+ "# 🌐 Welcome to AI-Powered Supply Chain Risk Advisor\n\n"
132
+ "I provide **real-time risk analysis** and **mitigation strategies** "
133
+ "based on:\n"
134
+ "- 🌍 **Regional factors** (port congestion, infrastructure)\n"
135
+ "- ⚠️ **Active events** (strikes, typhoons, disruptions)\n"
136
+ "- 🚢 **Route analysis** (origin to destination)\n"
137
+ "- 🤖 **ML-powered predictions** (trained on historical data)\n\n"
138
+ "### 💬 Example Questions:\n\n"
139
+ "- \"Is there any delay in vessels from USA to UAE?\"\n"
140
+ "- \"What should I do about the port strike in Shanghai?\"\n"
141
+ "- \"Are there weather problems affecting shipments to Germany?\"\n"
142
+ "- \"Risk level for Mumbai to Singapore route?\"\n\n"
143
+ "**Ask me anything about your supply chain risks!** 🚀"
144
+ ),
145
+ author="Risk Advisor Bot"
146
+ ).send()
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base
2
+ numpy==1.26.4
3
+ pandas==2.2.2
4
+ requests==2.32.3
5
+
6
+ # Machine Learning
7
+ scikit-learn==1.5.2
8
+ joblib==1.4.2
9
+
10
+ # Deep Learning (TensorFlow + Keras)
11
+ tensorflow==2.15.0
12
+ keras==2.15.0
13
+ protobuf==3.20.3
14
+
15
+ # NLP / Transformers
16
+ transformers==4.37.2
17
+ sentencepiece==0.2.0
18
+ torch==2.1.0
19
+
20
+ # Backend / Web
21
+ fastapi==0.110.2
22
+ uvicorn==0.25.0
23
+ python-dotenv==1.0.1
24
+ pydantic<2
25
+
26
+ # Chainlit App
27
+ chainlit==1.1.301
28
+
29
+ # Tools
30
+ pytest==8.3.2
31
+ pytest-asyncio==0.24.0
src/app/__pycache__/app.cpython-311.pyc ADDED
Binary file (5.24 kB). View file
 
src/app/__pycache__/chatbot.cpython-311.pyc ADDED
Binary file (5.25 kB). View file
 
src/app/__pycache__/chatbot.cpython-313.pyc ADDED
Binary file (3.1 kB). View file
 
src/app/__pycache__/fastapi_server.cpython-311.pyc ADDED
Binary file (5.27 kB). View file
 
src/app/app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
4
+
5
+ import chainlit as cl
6
+
7
+ from components.model_nlp_intent import predict_intent
8
+ from components.model_nlp_ner import extract_entities_pipeline
9
+ from components.model_risk_predictor import predict_risk
10
+ from components.recommendation_engine import generate_recommendation
11
+
12
+
13
+ @cl.on_message
14
+ async def handle_message(msg: cl.Message):
15
+ query = msg.content
16
+ session = cl.user_session
17
+
18
+
19
+ intent_result = predict_intent(query)
20
+ intent = intent_result["intent"]
21
+ confidence = intent_result["confidence"]
22
+
23
+
24
+ entities = extract_entities_pipeline(query)
25
+
26
+
27
+ region = None
28
+ origin = None
29
+ destination = None
30
+
31
+ if entities.get("location"):
32
+ locations = entities["location"]
33
+ if isinstance(locations, list) and len(locations) > 0:
34
+ region = locations[0]
35
+
36
+ if len(locations) > 1:
37
+ origin = locations[0]
38
+ destination = locations[1]
39
+ else:
40
+ region = locations
41
+
42
+ if not region:
43
+ region = "Mumbai"
44
+
45
+
46
+ incidents = []
47
+ event_type = None
48
+
49
+ if entities.get("event"):
50
+ events = entities["event"]
51
+ if isinstance(events, list):
52
+ incidents = events
53
+ event_type = events[0] if events else None
54
+ else:
55
+ incidents = [events]
56
+ event_type = events
57
+
58
+
59
+ risk_score = predict_risk(
60
+ region=region,
61
+ days=5,
62
+ origin=origin,
63
+ destination=destination,
64
+ event_type=event_type,
65
+ incidents=incidents
66
+ )
67
+
68
+
69
+ recent_incidents = incidents if incidents else ["port strike", "supplier outage"]
70
+ weather_alert = "Typhoon warning" if region == "Shanghai" else None
71
+
72
+ advice = generate_recommendation(
73
+ risk_score=risk_score,
74
+ region=region,
75
+ recent_incidents=recent_incidents,
76
+ weather_alert=weather_alert,
77
+ intent=intent
78
+ )
79
+
80
+
81
+ if risk_score >= 0.7:
82
+ risk_emoji = "🔴"
83
+ risk_level = "High"
84
+ elif risk_score >= 0.4:
85
+ risk_emoji = "🟡"
86
+ risk_level = "Medium"
87
+ else:
88
+ risk_emoji = "🟢"
89
+ risk_level = "Low"
90
+
91
+
92
+ response = (
93
+ f"### 📊 Supply Chain Risk Analysis\n\n"
94
+ f"**Region:** {region}\n"
95
+ f"**Intent:** {intent} (Confidence: {confidence:.2%})\n"
96
+ f"**Entities:** {entities}\n"
97
+ )
98
+
99
+
100
+ if origin and destination:
101
+ response += f"**Route:** {origin} → {destination}\n"
102
+
103
+
104
+ if incidents:
105
+ response += f"**⚠️ Detected Events:** {', '.join(incidents)}\n"
106
+
107
+ response += f"**Risk Score:** {risk_emoji} **{risk_level}** ({risk_score:.2f})\n\n"
108
+ response += f"**💡 Recommendation:**\n{advice['message']}\n"
109
+
110
+ await cl.Message(
111
+ content=response,
112
+ author="Supply Chain Risk Analysis"
113
+ ).send()
114
+
115
+ # Send Alert Level
116
+ alert_emoji = "🚨" if risk_score >= 0.7 else "⚠️" if risk_score >= 0.4 else "✅"
117
+ await cl.Message(
118
+ content=f"{alert_emoji} **Alert Level:** {advice['action'].upper()}",
119
+ author="Alert Level"
120
+ ).send()
121
+
122
+
123
+ @cl.on_chat_start
124
+ async def welcome():
125
+ await cl.Message(
126
+ content=(
127
+ "# 🌐 Welcome to AI-Powered Supply Chain Risk Advisor\n\n"
128
+ "I provide **real-time risk analysis** and **mitigation strategies** "
129
+ "based on:\n"
130
+ "- 🌍 **Regional factors** (port congestion, infrastructure)\n"
131
+ "- ⚠️ **Active events** (strikes, typhoons, disruptions)\n"
132
+ "- 🚢 **Route analysis** (origin to destination)\n"
133
+ "- 🤖 **ML-powered predictions** (trained on historical data)\n\n"
134
+ "### 💬 Example Questions:\n\n"
135
+ "- \"Is there any delay in vessels from USA to UAE?\"\n"
136
+ "- \"What should I do about the port strike in Shanghai?\"\n"
137
+ "- \"Are there weather problems affecting shipments to Germany?\"\n"
138
+ "- \"Risk level for Mumbai to Singapore route?\"\n\n"
139
+ "**Ask me anything about your supply chain risks!** 🚀"
140
+ ),
141
+ author="Risk Advisor Bot"
142
+ ).send()
src/app/fastapi_server.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ from typing import Optional, List
3
+
4
+ import sys
5
+ from pathlib import Path
6
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
7
+
8
+ from components.model_nlp_intent import predict_intent
9
+ from components.model_nlp_ner import extract_entities
10
+ from components.model_risk_predictor import predict_risk
11
+ from components.recommendation_engine import generate_recommendation
12
+
13
+ app = FastAPI(
14
+ title="Supply Chain Risk Advisor API",
15
+ description="Provides risk prediction, event queries, and mitigation recommendations.",
16
+ version="1.0"
17
+ )
18
+
19
+ @app.get("/health/")
20
+ def health():
21
+ return {"status": "ok"}
22
+
23
+ @app.get("/nlp/")
24
+ def nlp_analysis(query: str):
25
+ """Run both intent and entity extraction on a user query."""
26
+ intent_result = predict_intent(query)
27
+ entities = extract_entities(query)
28
+ return {
29
+ "query": query,
30
+ "intent": intent_result["intent"],
31
+ "confidence": intent_result["confidence"],
32
+ "entities": entities
33
+ }
34
+
35
+ @app.get("/predict-risk/")
36
+ def predict_risk_api(region: str, days: Optional[int] = 5):
37
+ """Return risk prediction for a region next N days."""
38
+ risk_score = predict_risk(region, days)
39
+ return {"region": region, "risk_score": risk_score, "days": days}
40
+
41
+ @app.get("/events/")
42
+ def events_api(region: Optional[str] = None):
43
+ """Query past incidents/events for a region or all regions."""
44
+ # Replace this with real event loading (e.g., from your snapshot/data files)
45
+ sample_events = [
46
+ {"region": "Germany", "event": "railway strike", "date": "2025-09-23"},
47
+ {"region": "Mumbai", "event": "weather alert", "date": "2025-10-05"},
48
+ {"region": "Shanghai", "event": "typhoon", "date": "2025-09-30"},
49
+ ]
50
+ if region:
51
+ filtered = [ev for ev in sample_events if ev["region"].lower() == region.lower()]
52
+ return {"events": filtered}
53
+ return {"events": sample_events}
54
+
55
+ @app.get("/recommendation/")
56
+ def recommendation_api(
57
+ region: str,
58
+ risk: float,
59
+ intent: Optional[str] = None,
60
+ recent_incidents: Optional[List[str]] = Query(None),
61
+ weather_alert: Optional[str] = None
62
+ ):
63
+ """Get mitigation recommendation for region and risk."""
64
+ advice = generate_recommendation(
65
+ risk_score=risk,
66
+ region=region,
67
+ recent_incidents=recent_incidents,
68
+ weather_alert=weather_alert,
69
+ intent=intent
70
+ )
71
+ return advice
72
+
73
+ @app.get("/bot/")
74
+ def chatbot_api(query: str):
75
+ """Full pipeline: intent, entities, risk prediction and recommendation."""
76
+ intent_result = predict_intent(query)
77
+ entities = extract_entities(query)
78
+ # Use the first location found or default to Mumbai for demo if missing
79
+ region = None
80
+ if entities.get("location"):
81
+ region = entities["location"][0] if isinstance(entities["location"], list) and entities["location"] else entities["location"]
82
+ if not region:
83
+ region = "Mumbai"
84
+ risk_score = predict_risk(region, 5)
85
+ recent_incidents = ["port strike", "supplier outage"] if region else []
86
+ weather_alert = "Typhoon warning" if region == "Shanghai" else None
87
+ advice = generate_recommendation(
88
+ risk_score=risk_score,
89
+ region=region,
90
+ recent_incidents=recent_incidents,
91
+ weather_alert=weather_alert,
92
+ intent=intent_result.get("intent")
93
+ )
94
+ return {
95
+ "query": query,
96
+ "intent": intent_result["intent"],
97
+ "confidence": intent_result["confidence"],
98
+ "entities": entities,
99
+ "region": region,
100
+ "risk_score": risk_score,
101
+ "advice": advice
102
+ }
103
+
104
+
src/components/__init__.py ADDED
File without changes
src/components/__pycache__/api_gnews_fetcher.cpython-311.pyc ADDED
Binary file (2.76 kB). View file
 
src/components/__pycache__/api_weather_fetcher.cpython-311.pyc ADDED
Binary file (4.59 kB). View file
 
src/components/__pycache__/data_ingestion.cpython-313.pyc ADDED
Binary file (4.15 kB). View file
 
src/components/__pycache__/model_nlp_intent.cpython-311.pyc ADDED
Binary file (7.25 kB). View file
 
src/components/__pycache__/model_nlp_ner.cpython-311.pyc ADDED
Binary file (19.3 kB). View file
 
src/components/__pycache__/model_risk_predictor.cpython-311.pyc ADDED
Binary file (14.3 kB). View file
 
src/components/__pycache__/recommendation_engine.cpython-311.pyc ADDED
Binary file (3.59 kB). View file
 
src/components/api_gnews_fetcher.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
8
+ from utils.logger import *
9
+
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ load_dotenv()
14
+ GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
15
+ GNEWS_API_ENDPOINT = "https://gnews.io/api/v4/search"
16
+
17
+ class GNewsFetcher:
18
+ def __init__(self, api_key=GNEWS_API_KEY, endpoint=GNEWS_API_ENDPOINT):
19
+ self.api_key = api_key
20
+ self.endpoint = endpoint
21
+ if not self.api_key:
22
+ logger.error("GNEWS_API_KEY environment variable not set.")
23
+
24
+ def fetch_news(self, keyword, max_results=100):
25
+ params = {
26
+ 'q': keyword,
27
+ 'token': self.api_key,
28
+ 'lang': 'en',
29
+ 'max': max_results,
30
+ }
31
+ try:
32
+ logger.info(f"Fetching GNews for keyword: {keyword}")
33
+ response = requests.get(self.endpoint, params=params)
34
+ response.raise_for_status()
35
+ articles = response.json().get('articles', [])
36
+ logger.info(f"Fetched {len(articles)} articles for '{keyword}'")
37
+ return articles
38
+ except Exception as e:
39
+ logger.error(f"GNews fetch error for '{keyword}': {e}")
40
+ return []
src/components/api_weather_fetcher.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
8
+ from utils.logger import *
9
+
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ load_dotenv()
14
+ WEATHERBIT_API_KEY = os.getenv("WEATHERBIT_API_KEY")
15
+ ENDPOINT = "https://api.weatherbit.io/v2.0/current"
16
+
17
+ class WeatherFetcher:
18
+ def __init__(self, api_key=WEATHERBIT_API_KEY, endpoint=ENDPOINT):
19
+ self.api_key = api_key
20
+ self.endpoint = endpoint
21
+ if not self.api_key:
22
+ logger.error("WEATHERBIT_API_KEY environment variable not set.")
23
+
24
+ def fetch_weather(self, lat, lon):
25
+ params = {
26
+ "lat": lat,
27
+ "lon": lon,
28
+ "key": self.api_key
29
+ }
30
+ try:
31
+ logger.info(f"Fetching weather for lat/lon: {lat},{lon}")
32
+ response = requests.get(self.endpoint, params=params)
33
+ response.raise_for_status()
34
+ logger.info(f"Weather fetch success for {lat},{lon}")
35
+ return response.json()
36
+ except Exception as e:
37
+ logger.error(f"WeatherBit fetch error for {lat},{lon}: {e}")
38
+ return None
39
+
40
+ @staticmethod
41
+ def extract_weather(data, loc):
42
+ if data and "data" in data and len(data["data"]) > 0:
43
+ entry = data["data"][0]
44
+ logger.info(f"Extracting weather for {loc['city']}, {loc['country']}")
45
+ return {
46
+ "city": loc["city"],
47
+ "country": loc["country"],
48
+ "lat": loc["lat"],
49
+ "lon": loc["lon"],
50
+ "timestamp": entry.get("ts"),
51
+ "datetime": entry.get("datetime"),
52
+ "temp": entry.get("temp"),
53
+ "weather_main": entry["weather"].get("description"),
54
+ "weather_code": entry["weather"].get("code"),
55
+ "precip": entry.get("precip"),
56
+ "wind_spd": entry.get("wind_spd"),
57
+ "wind_dir": entry.get("wind_cdir_full"),
58
+ "clouds": entry.get("clouds"),
59
+ "aqi": entry.get("aqi", None),
60
+ "visibility": entry.get("vis"),
61
+ "alert": "Yes" if entry["weather"].get("code", 800) >= 700 else "No"
62
+ }
63
+ else:
64
+ logger.warning("No valid weather data structure to extract.")
65
+ return None
src/components/data_cleaning.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from pathlib import Path
4
+ import sys
5
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
6
+
7
+ from utils.logger import *
8
+
9
+ import logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+
14
+
15
+
16
+ def clean_news_events(df):
17
+ required_columns = ['title', 'publishedAt', 'description', 'source', 'url']
18
+ df = df[[col for col in required_columns if col in df.columns]]
19
+ df = df.drop_duplicates(subset=['title', 'publishedAt'])
20
+ df['title'] = df['title'].str.strip().str.lower()
21
+ df['description'] = df['description'].str.strip().str.lower()
22
+ df['publishedAt'] = pd.to_datetime(df['publishedAt'], errors='coerce')
23
+ df = df.dropna(subset=['title', 'publishedAt'])
24
+ logger.info(f"Cleaned news events: {df.shape}")
25
+ return df
26
+
27
+
28
+
29
+ def clean_weather_alerts(df):
30
+ keep_cols = ['city', 'country', 'lat', 'lon', 'weather_main', 'timestamp']
31
+ df = df[[col for col in keep_cols if col in df.columns]].copy()
32
+ df['city'] = df['city'].str.strip().str.title()
33
+ df['country'] = df['country'].str.strip().str.upper()
34
+ df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
35
+ df = df.dropna(subset=['city', 'timestamp'])
36
+ logger.info(f"Cleaned weather alerts: {df.shape}")
37
+ return df
38
+
39
+
40
+
41
+ def clean_supply_chain_disruptions(df):
42
+ df = df.drop_duplicates()
43
+
44
+ df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'], errors='coerce')
45
+ df['shipping date (DateOrders)'] = pd.to_datetime(df['shipping date (DateOrders)'], errors='coerce')
46
+
47
+ if 'Late_delivery_risk' in df.columns:
48
+ df['Late_delivery_risk'] = df['Late_delivery_risk'].fillna(0).astype(int)
49
+
50
+ if 'Order Status' in df.columns:
51
+ df['Order Status'] = df['Order Status'].str.strip().str.title()
52
+ logger.info(f"Cleaned supply chain CSV: {df.shape}")
53
+ return df
54
+
55
+
56
+
57
+ if __name__ == "__main__":
58
+
59
+ artifacts = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw"
60
+
61
+
62
+ try:
63
+ news_df = pd.read_json(artifacts / "news_events.json")
64
+ cleaned_news = clean_news_events(news_df)
65
+ logger.info(f"News Alerts cleaned successfully: shape {cleaned_news.shape}")
66
+ except Exception as e:
67
+ logger.error(f"Error cleaning news: {e}")
68
+
69
+
70
+ try:
71
+ weather_df = pd.read_json(artifacts / "weather_alerts.json")
72
+ cleaned_weather = clean_weather_alerts(weather_df)
73
+ logger.info(f"Weather Alerts cleaned successfully: shape {cleaned_weather.shape}")
74
+ except Exception as e:
75
+ logger.error(f"Error cleaning weather: {e}")
76
+
77
+
78
+ try:
79
+ try:
80
+ sc_df = pd.read_csv(artifacts / "DataCoSupplyChainDataset.csv", encoding="utf-8")
81
+ except UnicodeDecodeError:
82
+ sc_df = pd.read_csv(artifacts / "DataCoSupplyChainDataset.csv", encoding="ISO-8859-1")
83
+ cleaned_sc = clean_supply_chain_disruptions(sc_df)
84
+ logger.info(f"Supply chain CSV cleaned successfully: shape {cleaned_sc.shape}")
85
+ except Exception as e:
86
+ logger.error(f"Error cleaning supply chain CSV: {e}")
87
+
88
+
89
+
90
+ processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
91
+ processed_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ # Save cleaned datasets
94
+ cleaned_news.to_csv(processed_dir / "news_events_clean.csv", index=False)
95
+ cleaned_weather.to_csv(processed_dir / "weather_alerts_clean.csv", index=False)
96
+ cleaned_sc.to_csv(processed_dir / "supply_chain_disruptions_clean.csv", index=False)
src/components/data_ingestion.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ import sys
5
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
6
+
7
+ from utils.logger import *
8
+
9
+ import logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+ def load_news_events(path=None):
13
+ if path is None:
14
+ path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "news_events.json"
15
+ try:
16
+ with open(path, "r", encoding="utf-8") as f:
17
+ data = pd.DataFrame(json.load(f))
18
+ logger.info(f"News events loaded successfully: {data.shape}")
19
+ return data
20
+ except Exception as e:
21
+ logger.error(f"Failed to load news events: {e}")
22
+ raise
23
+
24
+ def load_weather_alerts(path=None):
25
+ if path is None:
26
+ path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "weather_alerts.json"
27
+ try:
28
+ with open(path, "r", encoding="utf-8") as f:
29
+ data = pd.DataFrame(json.load(f))
30
+ logger.info(f"Weather alerts loaded successfully: {data.shape}")
31
+ return data
32
+ except Exception as e:
33
+ logger.error(f"Failed to load weather alerts: {e}")
34
+ raise
35
+
36
+ def load_supply_chain_disruptions(csv_path=None):
37
+ if csv_path is None:
38
+ csv_path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "DataCoSupplyChainDataset.csv"
39
+ try:
40
+ df = pd.read_csv(csv_path, encoding="utf-8")
41
+ logger.info(f"Historic incidents loaded successfully: {df.shape}")
42
+ return df
43
+ except UnicodeDecodeError:
44
+ df = pd.read_csv(csv_path, encoding="ISO-8859-1")
45
+ logger.info(f"Historic incidents loaded successfully (ISO-8859-1): {df.shape}")
46
+ return df
47
+ except Exception as e:
48
+ logger.error(f"Failed to load historic supply chain CSV: {e}")
49
+ raise
50
+
51
+ if __name__ == "__main__":
52
+ try:
53
+ news_df = load_news_events()
54
+ weather_df = load_weather_alerts()
55
+ try:
56
+ incidents_df = load_supply_chain_disruptions()
57
+ except Exception as e:
58
+ logger.error(f"No historic CSV loaded: {e}")
59
+ except Exception as e:
60
+ logger.error(f"Major error in data ingestion: {e}")
src/components/feature_engineering.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from pathlib import Path
4
+ import sys
5
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
6
+
7
+ from utils.logger import *
8
+
9
+ import logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+
14
+ def engineer_news_features(df):
15
+
16
+ KEYWORDS = {
17
+ "strike": ["strike", "walkout", "protest"],
18
+ "disaster": ["earthquake", "flood", "hurricane", "typhoon", "fire", "storm"],
19
+ "accident": ["collision", "accident", "spill", "blockage"],
20
+ }
21
+ for key, words in KEYWORDS.items():
22
+ df[f"is_{key}"] = (
23
+ df['title'].str.contains('|'.join(words), case=False, na=False) |
24
+ df['description'].str.contains('|'.join(words), case=False, na=False)
25
+ )
26
+
27
+ if "publishedAt" in df.columns:
28
+ df["event_weekday"] = pd.to_datetime(df["publishedAt"], errors='coerce').dt.weekday
29
+ df["event_hour"] = pd.to_datetime(df["publishedAt"], errors='coerce').dt.hour
30
+ logger.info(f"Engineered news event features: {df.shape}")
31
+ return df
32
+
33
+
34
+
35
+ def engineer_weather_features(df):
36
+
37
+ severe_words = ["Storm", "Thunderstorm", "Rain", "Snow", "Hurricane", "Extreme"]
38
+ df["severe_weather"] = df["weather_main"].str.contains('|'.join(severe_words), case=False, na=False)
39
+
40
+ if "weather_main" in df.columns:
41
+ df = pd.get_dummies(df, columns=["weather_main"], prefix="weather")
42
+
43
+ if "timestamp" in df.columns:
44
+ df["month"] = pd.to_datetime(df["timestamp"], errors='coerce').dt.month
45
+ df["season"] = pd.to_datetime(df["timestamp"], errors='coerce').dt.month % 12 // 3 + 1
46
+ logger.info(f"Engineered weather features: {df.shape}")
47
+ return df
48
+
49
+
50
+
51
+ def engineer_supply_chain_features(df):
52
+
53
+
54
+ if "order date (DateOrders)" in df.columns and "shipping date (DateOrders)" in df.columns:
55
+ df["lead_time_days"] = (
56
+ pd.to_datetime(df["shipping date (DateOrders)"], errors='coerce') -
57
+ pd.to_datetime(df["order date (DateOrders)"], errors='coerce')
58
+ ).dt.days
59
+
60
+ for col in ["Order Status", "Product Status", "Shipping Mode", "Order Region", "Order Country"]:
61
+ if col in df.columns:
62
+ df = pd.get_dummies(df, columns=[col], prefix=col.replace(' ', '_'))
63
+
64
+ if "Late_delivery_risk" in df.columns:
65
+ df["is_late"] = df["Late_delivery_risk"] > 0
66
+ logger.info(f"Engineered supply chain features: {df.shape}")
67
+ return df
68
+
69
+
70
+
71
+ if __name__ == "__main__":
72
+ processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
73
+
74
+ try:
75
+ news_df = pd.read_csv(processed_dir / "news_events_clean.csv")
76
+ news_feats = engineer_news_features(news_df)
77
+ news_feats.to_csv(processed_dir / "news_events_features.csv", index=False)
78
+ logger.info("Saved engineered news features.")
79
+ except Exception as e:
80
+ logger.error(f"Error engineering news features: {e}")
81
+
82
+ try:
83
+ weather_df = pd.read_csv(processed_dir / "weather_alerts_clean.csv")
84
+ weather_feats = engineer_weather_features(weather_df)
85
+ weather_feats.to_csv(processed_dir / "weather_alerts_features.csv", index=False)
86
+ logger.info("Saved engineered weather features.")
87
+ except Exception as e:
88
+ logger.error(f"Error engineering weather features: {e}")
89
+
90
+ try:
91
+ sc_df = pd.read_csv(processed_dir / "supply_chain_disruptions_clean.csv", encoding="utf-8")
92
+ sc_feats = engineer_supply_chain_features(sc_df)
93
+ sc_feats.to_csv(processed_dir / "supply_chain_disruptions_features.csv", index=False)
94
+ logger.info("Saved engineered supply chain features.")
95
+ except Exception as e:
96
+ logger.error(f"Error engineering supply chain features: {e}")
src/components/model_nlp_intent.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import LabelEncoder
7
+ import joblib
8
+ import sys
9
+ from pathlib import Path
10
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
11
+ from utils.logger import *
12
+
13
+ import logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def generate_synthetic_data():
17
+
18
+ data = {
19
+ 'text': [
20
+
21
+ "What's the risk for Mumbai shipments?",
22
+ "Any delays expected for Shanghai routes?",
23
+ "Is there disruption risk for my order?",
24
+ "Check risk status for Delhi delivery",
25
+ "Are there any supply chain issues?",
26
+
27
+
28
+ "Any weather alerts today?",
29
+ "What's the weather situation in Beijing?",
30
+ "Are there storms affecting deliveries?",
31
+ "Weather conditions for logistics?",
32
+ "Any severe weather warnings?",
33
+
34
+
35
+ "What should I do about delays?",
36
+ "How to avoid supply chain risks?",
37
+ "Suggest alternative routes",
38
+ "What are my options for rerouting?",
39
+ "Help me mitigate delivery issues",
40
+
41
+
42
+ "Hello, how can you help?",
43
+ "What can this system do?",
44
+ "I need information about logistics",
45
+ "Tell me about your capabilities",
46
+ "How does this chatbot work?"
47
+ ],
48
+ 'intent': [
49
+ 'risk_check', 'risk_check', 'risk_check', 'risk_check', 'risk_check',
50
+ 'weather_alert', 'weather_alert', 'weather_alert', 'weather_alert', 'weather_alert',
51
+ 'mitigation_help', 'mitigation_help', 'mitigation_help', 'mitigation_help', 'mitigation_help',
52
+ 'general_query', 'general_query', 'general_query', 'general_query', 'general_query'
53
+ ]
54
+ }
55
+ return pd.DataFrame(data)
56
+
57
+ def main():
58
+
59
+ df = generate_synthetic_data()
60
+
61
+
62
+ label_encoder = LabelEncoder()
63
+ df['label'] = label_encoder.fit_transform(df['intent'])
64
+
65
+
66
+ X_train, X_test, y_train, y_test = train_test_split(
67
+ df['text'], df['label'], test_size=0.2, random_state=42, stratify=df['label']
68
+ )
69
+
70
+
71
+ tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
72
+ model = TFDistilBertForSequenceClassification.from_pretrained(
73
+ 'distilbert-base-uncased',
74
+ num_labels=len(label_encoder.classes_)
75
+ )
76
+
77
+
78
+ train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=128, return_tensors='tf')
79
+ test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=128, return_tensors='tf')
80
+
81
+
82
+ train_dataset = tf.data.Dataset.from_tensor_slices((
83
+ dict(train_encodings),
84
+ y_train.values
85
+ )).batch(8)
86
+
87
+
88
+ model.compile(
89
+ optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
90
+ loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
91
+ metrics=['accuracy']
92
+ )
93
+
94
+
95
+ model.fit(train_dataset, epochs=3)
96
+
97
+
98
+ model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_intent"
99
+ model_dir.mkdir(parents=True, exist_ok=True)
100
+
101
+ model.save_pretrained(model_dir / "intent_model")
102
+ tokenizer.save_pretrained(model_dir / "intent_tokenizer")
103
+ joblib.dump(label_encoder, model_dir / "label_encoder.joblib")
104
+
105
+ logger.info(f"Intent classification model saved to {model_dir}")
106
+
107
+
108
+ test_queries = [
109
+ "Is there risk for my Beijing shipment?",
110
+ "Any weather problems today?",
111
+ "What should I do about delays?"
112
+ ]
113
+
114
+ for query in test_queries:
115
+ inputs = tokenizer(query, return_tensors='tf', truncation=True, padding=True, max_length=128)
116
+ outputs = model(inputs)
117
+ predicted_class = tf.argmax(outputs.logits, axis=1).numpy()[0]
118
+ intent = label_encoder.inverse_transform([predicted_class])[0]
119
+ confidence = tf.nn.softmax(outputs.logits)[0][predicted_class].numpy()
120
+
121
+ logger.info(f"Query: '{query}' -> Intent: {intent} (Confidence: {confidence:.3f})")
122
+
123
+
124
+ def predict_intent(text: str) -> dict:
125
+
126
+
127
+
128
+ model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_intent"
129
+ model = TFDistilBertForSequenceClassification.from_pretrained(model_dir / "intent_model")
130
+ tokenizer = DistilBertTokenizer.from_pretrained(model_dir / "intent_tokenizer")
131
+ label_encoder = joblib.load(model_dir / "label_encoder.joblib")
132
+
133
+ inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
134
+ outputs = model(inputs)
135
+ predicted_class = tf.argmax(outputs.logits, axis=1).numpy()[0]
136
+ intent = label_encoder.inverse_transform([predicted_class])[0]
137
+ confidence = float(tf.nn.softmax(outputs.logits)[0][predicted_class].numpy())
138
+ return {"intent": intent, "confidence": confidence}
139
+
140
+
141
+ if __name__ == "__main__":
142
+ main()
src/components/model_nlp_ner.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from transformers import DistilBertTokenizerFast, TFDistilBertForTokenClassification, pipeline
3
+ from sklearn.model_selection import train_test_split
4
+ import numpy as np
5
+ import joblib
6
+ import sys
7
+ from pathlib import Path
8
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
9
+ from utils.logger import *
10
+
11
+ import logging
12
+ logger = logging.getLogger(__name__)
13
+
14
+ EPOCHS = 30
15
+ BATCH_SIZE = 8
16
+ LEARNING_RATE = 5e-5
17
+ VALIDATION_SPLIT = 0.15
18
+ PATIENCE = 3
19
+
20
+ try:
21
+ from tensorflow_addons.optimizers import AdamW
22
+ optimizer = AdamW(learning_rate=LEARNING_RATE, weight_decay=1e-2)
23
+ except ImportError:
24
+ optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
25
+
26
+
27
+
28
+ examples = [
29
+ (["Delay", "in", "Shanghai", "due", "to", "storms"], ["O", "O", "B-LOC", "O", "O", "B-EVENT"]),
30
+ (["Any", "delay", "in", "vessel", "from", "USA", "to", "UAE", "?"], ["O", "O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O"]),
31
+ (["Cargo", "stuck", "at", "UAE", "port"], ["O", "O", "O", "B-LOC", "O"]),
32
+ (["Weather", "alert", "for", "USA"], ["O", "O", "O", "B-LOC"]),
33
+ (["Flood", "risk", "in", "Mumbai"], ["O", "O", "O", "B-LOC"]),
34
+ (["Port", "closure", "Middle", "East"], ["O", "O", "B-LOC", "I-LOC"]),
35
+ (["Is", "cargo", "delayed", "from", "USA", "to", "India", "?"], ["O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O"]),
36
+ (["Weather", "problems", "expected", "in", "USA"], ["O", "O", "O", "O", "B-LOC"]),
37
+ (["Port", "strike", "at", "Singapore"], ["O", "O", "O", "B-LOC"]),
38
+ (["Typhoon", "in", "Japan"], ["B-EVENT", "O", "B-LOC"]),
39
+ (["Reroute", "shipments", "from", "Los", "Angeles"], ["O", "O", "O", "B-LOC", "I-LOC"]),
40
+ (["Supply", "disruption", "Middle", "East"], ["O", "O", "B-LOC", "I-LOC"]),
41
+ (["Severe", "fog", "in", "United", "Arab", "Emirates"], ["O", "O", "O", "B-LOC", "I-LOC", "I-LOC"]),
42
+ (["Are", "shipments", "to", "Brazil", "affected", "by", "strike", "?"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT", "O"]),
43
+ (["Is", "Paris", "airport", "open", "after", "floods", "?"], ["O", "B-LOC", "O", "O", "O", "B-EVENT", "O"]),
44
+ (["Delay", "reported", "in", "Berlin"], ["O", "O", "O", "B-LOC"]),
45
+ (["Export", "hold", "at", "Los", "Angeles"], ["O", "O", "O", "B-LOC", "I-LOC"]),
46
+ (["Typhoon", "warning", "for", "Japan"], ["B-EVENT", "O", "O", "B-LOC"]),
47
+ (["Reroute", "cargo", "to", "Singapore"], ["O", "O", "O", "B-LOC"]),
48
+ (["Is", "there", "labor", "strike", "in", "Canada", "?"], ["O", "O", "O", "B-EVENT", "O", "B-LOC", "O"]),
49
+ (["Storm", "impact", "on", "United", "Kingdom"], ["B-EVENT", "O", "O", "B-LOC", "I-LOC"]),
50
+ (["Supply", "disruption", "Italy"], ["O", "O", "B-LOC"]),
51
+ (["Any", "hold-up", "in", "Dubai", "?",], ["O", "O", "O", "B-LOC", "O"]),
52
+ (["Cargo", "delay", "at", "Rotterdam", "port"], ["O", "O", "O", "B-LOC", "O"]),
53
+ (["Flood", "disrupts", "service", "in", "Turkey"], ["B-EVENT", "O", "O", "O", "B-LOC"]),
54
+ (["Severe", "thunderstorm", "in", "New", "York", "City"], ["O", "B-EVENT", "O", "B-LOC", "I-LOC", "I-LOC"]),
55
+ (["Is", "Shanghai", "port", "closed", "for", "holiday", "?"], ["O", "B-LOC", "O", "O", "O", "O", "O"]),
56
+ (["France", "logistics", "strike"], ["B-LOC", "O", "B-EVENT"]),
57
+ (["Export", "shipment", "to", "Spain", "delayed"], ["O", "O", "O", "B-LOC", "O"]),
58
+ (["Cargo", "rerouted", "from", "Colombo", "to", "Sydney"], ["O", "O", "O", "B-LOC", "O", "B-LOC"]),
59
+ (["Vessel", "from", "India", "held", "by", "customs"], ["O", "O", "B-LOC", "O", "O", "O"]),
60
+ (["Is", "Singapore", "affected", "by", "monsoon", "season", "?"], ["O", "B-LOC", "O", "O", "B-EVENT", "I-EVENT", "O"]),
61
+ (["Disruption", "in", "United", "Arab", "Emirates", "due", "to", "strike"], ["O", "O", "B-LOC", "I-LOC", "I-LOC", "O", "O", "B-EVENT"]),
62
+ (["How", "long", "is", "the", "delay", "in", "Mexico", "?"], ["O", "O", "O", "O", "O", "O", "B-LOC", "O"]),
63
+ (["Flood", "risk", "in", "Gujarat"], ["B-EVENT", "O", "O", "B-LOC"]),
64
+ (["Severe", "weather", "disrupts", "Melbourne", "port"], ["B-EVENT", "O", "O", "B-LOC", "O"]),
65
+ (["Export", "stopped", "from", "Jakarta", "because", "of", "strike"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT"]),
66
+ (["Storm", "warning", "for", "Delhi"], ["B-EVENT", "O", "O", "B-LOC"]),
67
+ (["Any", "delay", "from", "United", "States", "to", "United", "Kingdom", "?"], ["O", "O", "O", "B-LOC", "I-LOC", "O", "B-LOC", "I-LOC", "O"]),
68
+ (["Cargo", "stuck", "at", "Sao", "Paulo"], ["O", "O", "O", "B-LOC", "I-LOC"]),
69
+ (["Shipping", "interruption", "in", "Cairo"], ["O", "O", "O", "B-LOC"]),
70
+ (["Typhoon", "delays", "cargo", "to", "Hong", "Kong"], ["B-EVENT", "O", "O", "O", "B-LOC", "I-LOC"]),
71
+ (["No", "disruption", "in", "Berlin"], ["O", "O", "O", "B-LOC"]),
72
+ (["Port", "closure", "for", "Christmas", "in", "Canada"], ["O", "O", "O", "O", "O", "B-LOC"]),
73
+ (["Is", "there", "a", "strike", "in", "Melbourne", "?"], ["O", "O", "O", "B-EVENT", "O", "B-LOC", "O"]),
74
+ (["Shipment", "delayed", "in", "Mexico", "City"], ["O", "O", "O", "B-LOC", "I-LOC"]),
75
+ (["Are", "vessels", "from", "Copenhagen", "blocked", "?"], ["O", "O", "O", "B-LOC", "O", "O"]),
76
+ (["Heavy", "rains", "in", "Manila"], ["O", "B-EVENT", "O", "B-LOC"]),
77
+ (["Strike", "at", "Johannesburg", "port"], ["B-EVENT", "O", "B-LOC", "O"]),
78
+ (["Is", "the", "route", "from", "Italy", "to", "Brazil", "safe", "?"], ["O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O", "O"]),
79
+ (["Container", "stuck", "at", "Antwerp"], ["O", "O", "O", "B-LOC"]),
80
+ (["Any", "blockade", "in", "Pakistan", "?"], ["O", "B-EVENT", "O", "B-LOC", "O"]),
81
+ (["Flood", "alerts", "for", "Vietnam"], ["B-EVENT", "O", "O", "B-LOC"]),
82
+ (["Are", "planes", "to", "Madrid", "canceled", "?"], ["O", "O", "O", "B-LOC", "O", "O"]),
83
+ (["Shipments", "from", "Morocco", "are", "late"], ["O", "O", "B-LOC", "O", "O"]),
84
+ (["Earthquake", "in", "Indonesia", "affecting", "deliveries"], ["B-EVENT", "O", "B-LOC", "O", "O"]),
85
+ (["Rail", "disruption", "in", "Melbourne"], ["O", "B-EVENT", "O", "B-LOC"]),
86
+ (["Any", "closure", "at", "Rotterdam", "port", "?"], ["O", "B-EVENT", "O", "B-LOC", "O", "O"]),
87
+ (["Landslide", "blocks", "road", "to", "Lima"], ["B-EVENT", "O", "O", "O", "B-LOC"]),
88
+ (["Flights", "to", "Bangkok", "suspended"], ["O", "O", "B-LOC", "O"]),
89
+ (["Typhoon", "threat", "for", "Taipei"], ["B-EVENT", "O", "O", "B-LOC"]),
90
+ (["Is", "Melbourne", "port", "operational", "today", "?"], ["O", "B-LOC", "O", "O", "O", "O"]),
91
+ (["Japan", "export", "ban"], ["B-LOC", "O", "B-EVENT"]),
92
+ (["Closure", "in", "Buenos", "Aires"], ["B-EVENT", "O", "B-LOC", "I-LOC"]),
93
+ (["Truck", "strike", "delaying", "goods", "from", "Poland"], ["O", "B-EVENT", "O", "O", "O", "B-LOC"]),
94
+ (["Shanghai", "flood", "disrupts", "cargo"], ["B-LOC", "B-EVENT", "O", "O"]),
95
+ (["Supply", "held", "in", "Turkey", "because", "of", "strike"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT"]),
96
+ (["Port", "congestion", "in", "Los", "Angeles"], ["O", "B-EVENT", "O", "B-LOC", "I-LOC"]),
97
+ (["Storm", "approaching", "Cape", "Town"], ["B-EVENT", "O", "B-LOC", "I-LOC"]),
98
+ (["Bad", "weather", "New", "York"], ["O", "B-EVENT", "B-LOC", "I-LOC"]),
99
+ (["Zambia", "roads", "closed", "due", "to", "flood"], ["B-LOC", "O", "O", "O", "O", "B-EVENT"]),
100
+ (["Strike", "in", "Athens", "delays", "supply"], ["B-EVENT", "O", "B-LOC", "O", "O"]),
101
+ (["Transport", "problem", "in", "Perth"], ["O", "B-EVENT", "O", "B-LOC"]),
102
+ (["Typhoon", "interrupts", "shipments", "to", "Hong", "Kong"], ["B-EVENT", "O", "O", "O", "B-LOC", "I-LOC"]),
103
+ (["Avalanche", "blocks", "Italian", "border"], ["B-EVENT", "O", "B-LOC", "O"]),
104
+
105
+ ]
106
+
107
+
108
+ sentences = [s for s, t in examples]
109
+ tags = [t for s, t in examples]
110
+ unique_tags = sorted(set(l for ts in tags for l in ts))
111
+ label2id = {t: i for i, t in enumerate(unique_tags)}
112
+ id2label = {i: t for t, i in label2id.items()}
113
+ max_len = max(len(s) for s in sentences)
114
+ tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
115
+
116
+ def encode(sentences, labels, label2id, max_len):
117
+ encodings = tokenizer(sentences, is_split_into_words=True, padding='max_length', truncation=True, max_length=max_len, return_tensors='tf')
118
+ label_ids = []
119
+ sample_weights = []
120
+ for i, labs in enumerate(labels):
121
+ ids = [label2id[l] for l in labs]
122
+ padding_length = max_len - len(ids)
123
+ ids += [0]*padding_length
124
+ weights = [1]*len(labs) + [0]*padding_length
125
+ label_ids.append(ids)
126
+ sample_weights.append(weights)
127
+ encodings['labels'] = tf.convert_to_tensor(label_ids)
128
+ encodings['sample_weights'] = tf.convert_to_tensor(sample_weights, dtype=tf.float32)
129
+ return encodings
130
+
131
+ def train_ner_model():
132
+ X_train, X_val, y_train, y_val = train_test_split(sentences, tags, test_size=VALIDATION_SPLIT, random_state=42)
133
+ train_inputs = encode(X_train, y_train, label2id, max_len)
134
+ val_inputs = encode(X_val, y_val, label2id, max_len)
135
+
136
+
137
+ model = TFDistilBertForTokenClassification.from_pretrained(
138
+ 'distilbert-base-uncased',
139
+ num_labels=len(label2id),
140
+ id2label=id2label,
141
+ label2id=label2id
142
+ )
143
+ loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
144
+ model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'],weighted_metrics=['accuracy'])
145
+
146
+
147
+ callback = tf.keras.callbacks.EarlyStopping(
148
+ monitor='val_loss',
149
+ patience=PATIENCE,
150
+ restore_best_weights=True
151
+ )
152
+
153
+ logger.info("Starting NER model training (tuned).")
154
+
155
+
156
+ history = model.fit(
157
+ {k: v for k, v in train_inputs.items() if k not in ['labels', 'sample_weights']},
158
+ train_inputs['labels'],
159
+ sample_weight=train_inputs['sample_weights'],
160
+ epochs=EPOCHS,
161
+ batch_size=BATCH_SIZE,
162
+ validation_data=(
163
+ {k: v for k, v in val_inputs.items() if k not in ['labels', 'sample_weights']},
164
+ val_inputs['labels'],
165
+ val_inputs['sample_weights']
166
+ ),
167
+ callbacks=[callback]
168
+ )
169
+
170
+ logger.info("Training complete.")
171
+ logger.info(f"Best validation accuracy: {max(history.history['val_accuracy'])}")
172
+
173
+ out_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_ner"
174
+ out_dir.mkdir(parents=True, exist_ok=True)
175
+ model.save_pretrained(out_dir / "ner_model")
176
+ tokenizer.save_pretrained(out_dir / "ner_tokenizer")
177
+ joblib.dump(label2id, out_dir / "label2id.joblib")
178
+ logger.info(f"NER (TF) model, tokenizer, and label map saved to {out_dir}")
179
+
180
+
181
+ def extract_entities_pipeline(text: str) -> dict:
182
+ model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_ner"
183
+ custom_model = TFDistilBertForTokenClassification.from_pretrained(model_dir / "ner_model")
184
+ custom_tokenizer = DistilBertTokenizerFast.from_pretrained(model_dir / "ner_tokenizer")
185
+ label2id = joblib.load(model_dir / "label2id.joblib")
186
+ id2label = {i: t for t, i in label2id.items()}
187
+ max_len = 32
188
+ tokens = text.split()
189
+ encoding = custom_tokenizer([tokens], is_split_into_words=True, return_tensors='tf', padding='max_length', truncation=True, max_length=max_len)
190
+ outputs = custom_model({k: v for k, v in encoding.items() if k != "labels"})
191
+ logits = outputs.logits.numpy()[0]
192
+ pred_ids = np.argmax(logits, axis=-1)
193
+ custom_entities = {"location": [], "event": []}
194
+ current_loc, current_evt = [], []
195
+ for w, id in zip(tokens, pred_ids[:len(tokens)]):
196
+ label = id2label[id]
197
+ if label == "B-LOC":
198
+ if current_loc:
199
+ custom_entities["location"].append(" ".join(current_loc))
200
+ current_loc = []
201
+ current_loc = [w]
202
+ elif label == "I-LOC" and current_loc:
203
+ current_loc.append(w)
204
+ else:
205
+ if current_loc:
206
+ custom_entities["location"].append(" ".join(current_loc))
207
+ current_loc = []
208
+ if label == "B-EVENT":
209
+ if current_evt:
210
+ custom_entities["event"].append(" ".join(current_evt))
211
+ current_evt = []
212
+ current_evt = [w]
213
+ elif label == "I-EVENT" and current_evt:
214
+ current_evt.append(w)
215
+ else:
216
+ if current_evt:
217
+ custom_entities["event"].append(" ".join(current_evt))
218
+ current_evt = []
219
+ if current_loc:
220
+ custom_entities["location"].append(" ".join(current_loc))
221
+ if current_evt:
222
+ custom_entities["event"].append(" ".join(current_evt))
223
+
224
+ hf_ner = pipeline("ner", grouped_entities=True, model="dbmdz/bert-large-cased-finetuned-conll03-english")
225
+ hf_results = hf_ner(text)
226
+ hf_locations = [ent['word'] for ent in hf_results if ent['entity_group'] == "LOC"]
227
+
228
+ all_locations = set(custom_entities["location"]) | set(hf_locations)
229
+ all_events = custom_entities["event"]
230
+ return {"location": list(all_locations), "event": all_events}
231
+
232
+
233
+ if __name__ == "__main__":
234
+ train_ner_model()
src/components/model_risk_predictor.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import HistGradientBoostingClassifier
5
+ from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
6
+ from sklearn.inspection import permutation_importance
7
+ import joblib
8
+ import sys
9
+ from pathlib import Path
10
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
11
+ from utils.logger import *
12
+
13
+ import logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def main():
17
+ processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
18
+ model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "risk_predictor"
19
+ model_dir.mkdir(parents=True, exist_ok=True)
20
+ df = pd.read_csv(processed_dir / "supply_chain_disruptions_features.csv")
21
+
22
+ target = "is_late"
23
+ if target not in df.columns:
24
+ logger.error(f"Target column {target} not found.")
25
+ return
26
+
27
+ exclude_cols = [
28
+ target, "Customer Id", "Order Id", "Order Item Id", "Order Customer Id",
29
+ "Late_delivery_risk", "Late Delivery Risk", "Delivery Status",
30
+ "lead_time_days", "Days for shipping (real)", "Days for shipment (scheduled)"
31
+ ]
32
+ feature_cols = [
33
+ c for c in df.columns
34
+ if c not in exclude_cols and df[c].dtype in [np.float64, np.int64, np.bool_, np.int32]
35
+ ]
36
+
37
+ X = df[feature_cols]
38
+ y = df[target].astype(int)
39
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
40
+
41
+ logger.info(f"Training data shape: {X_train.shape}, Test data shape: {X_test.shape}")
42
+
43
+ model = HistGradientBoostingClassifier(
44
+ max_iter=100, learning_rate=1.0, max_depth=1, random_state=42
45
+ )
46
+ model.fit(X_train, y_train)
47
+
48
+ y_pred = model.predict(X_test)
49
+ y_proba = model.predict_proba(X_test)[:, 1]
50
+ report = classification_report(y_test, y_pred)
51
+ cm = confusion_matrix(y_test, y_pred)
52
+ roc_auc = roc_auc_score(y_test, y_proba)
53
+ logger.info("Classification Report:\n" + report)
54
+ logger.info(f"Confusion Matrix:\n{cm}")
55
+ logger.info(f"ROC-AUC: {roc_auc}")
56
+
57
+ result = permutation_importance(model, X_test, y_test, n_repeats=10, random_state=42, n_jobs=-1)
58
+ importances = result.importances_mean
59
+ feature_importance = pd.DataFrame({'feature': X_test.columns, 'importance': importances}).sort_values('importance', ascending=False)
60
+ logger.info("Top 10 Most Important Features (Permutation Importance):")
61
+ logger.info(feature_importance.head(10).to_string())
62
+ max_importance = feature_importance['importance'].max()
63
+ if max_importance > 0.8:
64
+ logger.warning(f"Potential data leakage: One feature has {max_importance:.3f} importance")
65
+
66
+ model_path = model_dir / "hist_gradient_boosting_risk_predictor.joblib"
67
+ joblib.dump(model, model_path)
68
+ logger.info(f"Model saved to {model_path}")
69
+
70
+
71
+ def build_feature_row(feature_cols, query_dict, reference_row=None):
72
+
73
+ if reference_row is None:
74
+ reference_row = pd.Series({col: 0 for col in feature_cols})
75
+
76
+ row = reference_row.copy()
77
+
78
+
79
+ shipping_mode = query_dict.get('shipping_mode', 'Standard Class')
80
+ for col in feature_cols:
81
+ if 'Shipping_Mode' in col and shipping_mode in col:
82
+ row[col] = 1
83
+ logger.debug(f"Set shipping mode: {col} = 1")
84
+
85
+
86
+ region = query_dict.get('region', '')
87
+ for col in feature_cols:
88
+ if 'Order_Country' in col or 'Order_Region' in col:
89
+ if region in col:
90
+ row[col] = 1
91
+
92
+
93
+ for col in feature_cols:
94
+ if 'Order_Status_COMPLETE' in col:
95
+ row[col] = 1
96
+
97
+ return row
98
+
99
+
100
+
101
+ REGION_BASE_RISKS = {
102
+ "Shanghai": 0.55,
103
+ "Singapore": 0.30,
104
+ "Mumbai": 0.45,
105
+ "Dubai": 0.35,
106
+ "UAE": 0.35,
107
+ "USA": 0.30,
108
+ "Germany": 0.25,
109
+ "China": 0.55,
110
+ "India": 0.45,
111
+ "Hong Kong": 0.50,
112
+ "Rotterdam": 0.28,
113
+ "Los Angeles": 0.40,
114
+ }
115
+
116
+
117
+ EVENT_RISK_MULTIPLIERS = {
118
+ "strike": 0.30,
119
+ "port strike": 0.35,
120
+ "typhoon": 0.35,
121
+ "hurricane": 0.35,
122
+ "earthquake": 0.40,
123
+ "flood": 0.25,
124
+ "port closure": 0.45,
125
+ "supplier outage": 0.25,
126
+ "customs delay": 0.15,
127
+ "congestion": 0.20,
128
+ "pandemic": 0.30,
129
+ "war": 0.50,
130
+ "sanctions": 0.40,
131
+ }
132
+
133
+
134
+ def calculate_rule_based_risk(region, days, incidents):
135
+
136
+ base_risk = REGION_BASE_RISKS.get(region, 0.40)
137
+
138
+
139
+ event_risk = 0.0
140
+ if incidents:
141
+ for incident in incidents:
142
+ incident_lower = str(incident).lower()
143
+ for event_keyword, multiplier in EVENT_RISK_MULTIPLIERS.items():
144
+ if event_keyword in incident_lower:
145
+ event_risk += multiplier
146
+ logger.debug(f"Event '{event_keyword}' detected in '{incident}', adding {multiplier}")
147
+
148
+
149
+ time_factor = max(0.1, 1.0 - (days / 30.0))
150
+
151
+
152
+ rule_risk = (base_risk * 0.5 + event_risk * 0.4 + time_factor * 0.1)
153
+
154
+ return min(1.0, rule_risk)
155
+
156
+
157
+ def predict_risk(region: str, days: int = 5, origin=None, destination=None,
158
+ event_type=None, incidents=None, shipping_mode=None):
159
+
160
+ try:
161
+ import joblib
162
+ import pandas as pd
163
+ from pathlib import Path
164
+
165
+ model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "risk_predictor"
166
+ model_path = model_dir / "hist_gradient_boosting_risk_predictor.joblib"
167
+
168
+
169
+ if shipping_mode is None:
170
+ shipping_mode = "Standard Class"
171
+
172
+
173
+ rule_risk = calculate_rule_based_risk(region, days, incidents or [])
174
+ logger.info(f"Rule-based risk for {region}: {rule_risk:.3f}")
175
+
176
+
177
+ ml_risk = 0.40
178
+
179
+ if model_path.exists():
180
+ try:
181
+ model = joblib.load(model_path)
182
+ logger.debug(f"Loaded ML model from {model_path}")
183
+
184
+ data_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
185
+ feature_csv_path = data_dir / "supply_chain_disruptions_features.csv"
186
+
187
+ if feature_csv_path.exists():
188
+ feature_csv = pd.read_csv(feature_csv_path)
189
+ feature_cols = list(model.feature_names_in_) if hasattr(model, "feature_names_in_") else list(feature_csv.columns)
190
+
191
+ reference_row = feature_csv[feature_cols].median()
192
+
193
+ query_dict = {
194
+ "region": region,
195
+ "days": days,
196
+ "origin": origin,
197
+ "destination": destination,
198
+ "shipping_mode": shipping_mode,
199
+ }
200
+
201
+ test_features = pd.DataFrame([build_feature_row(feature_cols, query_dict, reference_row)])
202
+ ml_risk = float(model.predict_proba(test_features)[0, 1])
203
+ logger.info(f"ML model risk for {region}: {ml_risk:.3f}")
204
+ except Exception as e:
205
+ logger.warning(f"Could not get ML prediction: {e}")
206
+
207
+
208
+ if incidents and len(incidents) > 0:
209
+
210
+ final_risk = (ml_risk * 0.40) + (rule_risk * 0.60)
211
+ logger.info(f"Hybrid risk (with incidents): ML={ml_risk:.3f}*0.4 + Rule={rule_risk:.3f}*0.6 = {final_risk:.3f}")
212
+ else:
213
+
214
+ final_risk = (ml_risk * 0.70) + (rule_risk * 0.30)
215
+ logger.info(f"Hybrid risk (no incidents): ML={ml_risk:.3f}*0.7 + Rule={rule_risk:.3f}*0.3 = {final_risk:.3f}")
216
+
217
+
218
+ final_risk = float(np.clip(final_risk, 0.0, 1.0))
219
+
220
+ return round(final_risk, 2)
221
+
222
+ except Exception as e:
223
+ logger.error(f"Error in predict_risk: {e}", exc_info=True)
224
+ return 0.50
225
+
226
+
227
+ if __name__ == "__main__":
228
+ main()
229
+
230
+
231
+ print("\n" + "="*60)
232
+ print("Testing HYBRID Risk Predictions (ML + Rules)")
233
+ print("="*60)
234
+
235
+
236
+ print("\n1. UAE with no events:")
237
+ risk1 = predict_risk("UAE", days=5, incidents=[])
238
+ print(f" → Risk Score: {risk1:.2f}")
239
+
240
+
241
+ print("\n2. Shanghai with port strike:")
242
+ risk2 = predict_risk("Shanghai", days=5, incidents=["port strike"])
243
+ print(f" → Risk Score: {risk2:.2f}")
244
+ print(f" → Increase: +{(risk2-risk1)*100:.1f}%")
245
+
246
+
247
+ print("\n3. Mumbai with typhoon and port congestion:")
248
+ risk3 = predict_risk("Mumbai", days=3, incidents=["typhoon", "port congestion"])
249
+ print(f" → Risk Score: {risk3:.2f}")
250
+ print(f" → Increase: +{(risk3-risk1)*100:.1f}%")
251
+
252
+
253
+ print("\n4. USA to Singapore route (no events):")
254
+ risk4 = predict_risk("Singapore", days=7, origin="USA", destination="Singapore", incidents=[])
255
+ print(f" → Risk Score: {risk4:.2f}")
256
+
257
+
258
+ print("\n5. USA to Singapore with equipment failure:")
259
+ risk5 = predict_risk("Singapore", days=7, origin="USA", destination="Singapore",
260
+ incidents=["equipment failure", "customs delay"])
261
+ print(f" → Risk Score: {risk5:.2f}")
262
+ print(f" → Increase: +{(risk5-risk4)*100:.1f}%")
263
+
264
+
265
+ print("\n6. Shanghai with multiple critical events:")
266
+ risk6 = predict_risk("Shanghai", days=2, incidents=["typhoon", "port strike", "port closure"])
267
+ print(f" → Risk Score: {risk6:.2f} ")
268
+
269
+ print("\n" + "="*60)
270
+ print("Hybrid approach combines:")
271
+ print(" - ML Model: Historical shipping patterns")
272
+ print(" - Rules: Real-time events and regional factors")
273
+ print("="*60)
src/components/model_timeseries_risk.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.utils import class_weight
7
+ import joblib
8
+ from pathlib import Path
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+
15
+ base_dir = Path(__file__).resolve().parents[2]
16
+ data_path = base_dir / "artifacts" / "data" / "processed" / "supply_chain_disruptions_features.csv"
17
+
18
+
19
+ df = pd.read_csv(data_path)
20
+ region_col = "Order City"
21
+ region_name = "Shanghai"
22
+
23
+
24
+ df_region = df[df[region_col] == region_name].copy()
25
+ if len(df_region) < 100:
26
+ logger.warning("Region sample is small, upsampling/cropping to 200 rows from full dataset.")
27
+ df_region = df.sample(200, random_state=42) if len(df) >= 200 else df
28
+
29
+ feature_cols = [
30
+ "Days for shipping (real)", "Sales per customer", "Order Item Discount",
31
+ "Order Item Product Price", "Order Item Quantity"
32
+ ]
33
+ label_col = "Late_delivery_risk"
34
+ seq_length = 7
35
+
36
+ X_all = df_region[feature_cols].fillna(0).astype(float).values
37
+ y_all = df_region[label_col].fillna(0).astype(int).values
38
+
39
+ scaler = StandardScaler()
40
+ X_scaled = scaler.fit_transform(X_all)
41
+
42
+ X_seq, y_seq = [], []
43
+ for i in range(len(X_scaled) - seq_length):
44
+ X_seq.append(X_scaled[i:i+seq_length])
45
+ y_seq.append(y_all[i+seq_length])
46
+ X_seq = np.array(X_seq)
47
+ y_seq = np.array(y_seq)
48
+ logger.info(f"Sequence shape: {X_seq.shape}; Labels: {y_seq.shape}")
49
+
50
+ if len(X_seq) < 2:
51
+ logger.error("Not enough sequences. Add more data or lower seq_length.")
52
+ exit()
53
+
54
+
55
+ test_size = int(0.2 * len(X_seq))
56
+ X_train, X_test = X_seq[:-test_size], X_seq[-test_size:]
57
+ y_train, y_test = y_seq[:-test_size], y_seq[-test_size:]
58
+
59
+
60
+ weights = class_weight.compute_class_weight(class_weight="balanced",
61
+ classes=np.unique(y_train),
62
+ y=y_train)
63
+ class_weight_dict = dict(zip(np.unique(y_train), weights))
64
+
65
+
66
+ model = tf.keras.Sequential([
67
+ tf.keras.layers.Input(shape=(seq_length, len(feature_cols))),
68
+ tf.keras.layers.LSTM(64, return_sequences=True),
69
+ tf.keras.layers.Dropout(0.25),
70
+ tf.keras.layers.LSTM(32),
71
+ tf.keras.layers.Dropout(0.25),
72
+ tf.keras.layers.Dense(1, activation="sigmoid")
73
+ ])
74
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
75
+
76
+ logger.info("Training LSTM risk model with weighted loss and dropout.")
77
+ model.fit(X_train, y_train, epochs=12, batch_size=8,
78
+ validation_split=0.1, class_weight=class_weight_dict)
79
+
80
+ test_loss, test_acc = model.evaluate(X_test, y_test)
81
+ logger.info(f"Test Accuracy: {test_acc:.4f}")
82
+
83
+
84
+ model_dir = base_dir / "artifacts" / "models" / "timeseries_risk"
85
+ model_dir.mkdir(parents=True, exist_ok=True)
86
+ model.save(model_dir / "lstm_risk_model.keras")
87
+ joblib.dump(scaler, model_dir / "scaler.joblib")
88
+ logger.info(f"Saved LSTM model and scaler to {model_dir}")
89
+
90
+ def predict_risk_for_next_day(sequence, threshold=0.5):
91
+ seq = scaler.transform(sequence)
92
+ seq_window = np.expand_dims(seq, axis=0)
93
+ pred_prob = model.predict(seq_window)[0][0]
94
+ pred_label = int(pred_prob > threshold)
95
+ logger.info(f"Predicted next-day risk score: {pred_prob:.3f} (region: {region_name}), label: {pred_label}")
96
+ return pred_prob, pred_label
97
+
98
+ if X_test.shape[0] > 0:
99
+ logger.info("Demo prediction for next-day risk using last window of test set:")
100
+ predict_risk_for_next_day(X_test[0], threshold=0.5)
src/components/recommendation_engine.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
4
+ from utils.logger import *
5
+
6
+ import logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def generate_recommendation(
10
+ risk_score,
11
+ region,
12
+ recent_incidents=None,
13
+ weather_alert=None,
14
+ intent=None,
15
+ origin=None,
16
+ destination=None
17
+ ):
18
+
19
+ if origin and destination:
20
+ region_str = f"{origin} to {destination}"
21
+ else:
22
+ region_str = region
23
+
24
+ if risk_score >= 0.8:
25
+ level = "High risk"
26
+ message = (
27
+ f"{level} detected for {region_str}! Recent incidents or delays increase disruption probability. "
28
+ "Immediate mitigation advised—consider rerouting, switching suppliers, or delaying shipment."
29
+ )
30
+ action = "reroute/switch_supplier/delay"
31
+ elif risk_score >= 0.6:
32
+ level = "Elevated risk"
33
+ message = (
34
+ f"{level} in {region_str}. Monitor closely and prioritize more reliable suppliers and routes."
35
+ )
36
+ action = "monitor_prioritize"
37
+ elif risk_score >= 0.3:
38
+ level = "Moderate risk"
39
+ message = (
40
+ f"{level} for {region_str}. Standard operations are feasible, but stay alert for escalating risks."
41
+ )
42
+ action = "continue_monitor"
43
+ else:
44
+ level = "Low risk"
45
+ message = f"{level} for {region_str}. Proceed with routine operations."
46
+ action = "proceed"
47
+
48
+
49
+ if weather_alert:
50
+ message += f"\nWeather Alert: {weather_alert}"
51
+ if recent_incidents:
52
+ message += f"\nRecent incidents: {', '.join(recent_incidents[:3])}"
53
+
54
+
55
+ if recent_incidents and risk_score >= 0.8:
56
+ message += "\nSupply chain disruption likely due to recent incidents. Take immediate action to mitigate risk."
57
+
58
+
59
+ if intent == "mitigation_help" and risk_score >= 0.5:
60
+ message += "\nWould you like to view alternate routes or suppliers for mitigation?"
61
+
62
+ logger.info(f"Recommendation for {region_str} (risk: {risk_score:.2f}): {action}")
63
+ return {
64
+ "message": message,
65
+ "action": action,
66
+ "risk_score": risk_score,
67
+ "region": region_str
68
+ }
69
+
70
+ if __name__ == "__main__":
71
+
72
+ ex1 = generate_recommendation(
73
+ risk_score=0.85,
74
+ region="Shanghai",
75
+ recent_incidents=['port strike', 'supplier outage', 'heavy rain'],
76
+ weather_alert='Typhoon warning',
77
+ intent="mitigation_help",
78
+ origin="Shanghai",
79
+ destination="Los Angeles"
80
+ )
81
+ print("\n--- Example Recommendation ---")
82
+ print(ex1["message"])
83
+
84
+ ex2 = generate_recommendation(
85
+ risk_score=0.55,
86
+ region="Delhi",
87
+ recent_incidents=['route accident', 'moderate rain'],
88
+ weather_alert=None,
89
+ intent="risk_check",
90
+ origin="Delhi",
91
+ destination="Dubai"
92
+ )
93
+ print("\n--- Example Recommendation ---")
94
+ print(ex2["message"])
95
+
96
+ ex3 = generate_recommendation(
97
+ risk_score=0.15,
98
+ region="Mumbai",
99
+ recent_incidents=[],
100
+ intent=None
101
+ )
102
+ print("\n--- Example Recommendation ---")
103
+ print(ex3["message"])
src/config/__init__.py ADDED
File without changes
src/config/__pycache__/config.cpython-311.pyc ADDED
Binary file (522 Bytes). View file
 
src/config/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ API_CONFIG = {
2
+ "regions": ["Mumbai", "Shanghai", "New York"], # For GNews
3
+ "weather_regions": [
4
+ {"city": "Mumbai", "lat": 19.0760, "lon": 72.8777},
5
+ {"city": "Shanghai", "lat": 31.2304, "lon": 121.4737},
6
+ {"city": "New York", "lat": 40.7128, "lon": -74.0060}
7
+ ],
8
+ "news_output_dir": "data/news",
9
+ "weather_output_dir": "data/weather"
10
+ }
src/pipeline/__init__.py ADDED
File without changes
src/pipeline/__pycache__/data_refresh_workflow.cpython-311.pyc ADDED
Binary file (5 kB). View file
 
src/pipeline/data_refresh_workflow.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+
4
+ import sys
5
+ from pathlib import Path
6
+ sys.path.append(str(Path(__file__).resolve().parents[1]))
7
+ from utils.logger import *
8
+
9
+ from components.api_gnews_fetcher import GNewsFetcher
10
+ from components.api_weather_fetcher import WeatherFetcher
11
+ from config.config import API_CONFIG
12
+ from utils.logger import *
13
+
14
+ import logging
15
+ logger = logging.getLogger(__name__)
16
+
17
+ def ensure_dir(path):
18
+ if not os.path.exists(path):
19
+ os.makedirs(path)
20
+ logger.info(f"Directory created: {path}")
21
+
22
+ def save_snapshot(data, folder, prefix, region):
23
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
24
+ fname = f"{prefix}_{region.replace(' ', '_')}_{timestamp}.json"
25
+ fpath = os.path.join(folder, fname)
26
+ with open(fpath, "w", encoding="utf-8") as f:
27
+ import json
28
+ json.dump(data, f, ensure_ascii=False, indent=2)
29
+ logger.info(f"Snapshot saved: {fpath}")
30
+
31
+ def refresh_gnews(regions, out_dir):
32
+ fetcher = GNewsFetcher()
33
+ ensure_dir(out_dir)
34
+ for region in regions:
35
+ try:
36
+ news_data = fetcher.fetch_news(region)
37
+ save_snapshot(news_data, out_dir, "gnews", region)
38
+ logger.info(f"GNews data for {region} saved.")
39
+ except Exception as e:
40
+ logger.error(f"Error fetching GNews for {region}: {e}")
41
+
42
+ def refresh_weather(weather_regions, out_dir):
43
+ fetcher = WeatherFetcher()
44
+ ensure_dir(out_dir)
45
+ for loc in weather_regions:
46
+ try:
47
+ weather_data = fetcher.fetch_weather(loc["lat"], loc["lon"])
48
+ save_snapshot(weather_data, out_dir, "weather", loc["city"])
49
+ logger.info(f"Weather data for {loc['city']} saved.")
50
+ except Exception as e:
51
+ logger.error(f"Error fetching Weather for {loc['city']}: {e}")
52
+
53
+ def run_all():
54
+ logger.info("Starting data refresh workflow...")
55
+ regions = API_CONFIG['regions'] # For GNews
56
+ weather_regions = API_CONFIG['weather_regions'] # For WeatherBit
57
+ news_dir = API_CONFIG['news_output_dir']
58
+ weather_dir = API_CONFIG['weather_output_dir']
59
+
60
+ refresh_gnews(regions, news_dir)
61
+ refresh_weather(weather_regions, weather_dir)
62
+ logger.info("All data refreshes complete.")
63
+
64
+
65
+ if __name__ == "__main__":
66
+ run_all()
src/utils/__init__.py ADDED
File without changes
src/utils/__pycache__/logger.cpython-311.pyc ADDED
Binary file (744 Bytes). View file
 
src/utils/__pycache__/logger.cpython-313.pyc ADDED
Binary file (710 Bytes). View file
 
src/utils/logger.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ os.makedirs('artifacts/logs', exist_ok=True)
5
+
6
+ logging.basicConfig(
7
+ level=logging.INFO,
8
+ format='%(asctime)s - %(levelname)s - %(message)s',
9
+ handlers=[
10
+ logging.FileHandler(os.path.join('artifacts', 'logs', 'logfile.txt')),
11
+ logging.StreamHandler()
12
+ ]
13
+ )