Spaces:

samithcs
/

chainlit-supplychain-app

Sleeping

App Files Files Community

samithcs commited on Oct 22, 2025

Commit

1eaee2c

1 Parent(s): deadecc

Add Chainlit app files (Dockerfile, app.py, requirements.txt, src) and update README

Browse files

Files changed (38) hide show

Dockerfile +22 -0
README.md +3 -4
app.py +146 -0
requirements.txt +31 -0
src/app/__pycache__/app.cpython-311.pyc +0 -0
src/app/__pycache__/chatbot.cpython-311.pyc +0 -0
src/app/__pycache__/chatbot.cpython-313.pyc +0 -0
src/app/__pycache__/fastapi_server.cpython-311.pyc +0 -0
src/app/app.py +142 -0
src/app/fastapi_server.py +104 -0
src/components/__init__.py +0 -0
src/components/__pycache__/api_gnews_fetcher.cpython-311.pyc +0 -0
src/components/__pycache__/api_weather_fetcher.cpython-311.pyc +0 -0
src/components/__pycache__/data_ingestion.cpython-313.pyc +0 -0
src/components/__pycache__/model_nlp_intent.cpython-311.pyc +0 -0
src/components/__pycache__/model_nlp_ner.cpython-311.pyc +0 -0
src/components/__pycache__/model_risk_predictor.cpython-311.pyc +0 -0
src/components/__pycache__/recommendation_engine.cpython-311.pyc +0 -0
src/components/api_gnews_fetcher.py +40 -0
src/components/api_weather_fetcher.py +65 -0
src/components/data_cleaning.py +96 -0
src/components/data_ingestion.py +60 -0
src/components/feature_engineering.py +96 -0
src/components/model_nlp_intent.py +142 -0
src/components/model_nlp_ner.py +234 -0
src/components/model_risk_predictor.py +273 -0
src/components/model_timeseries_risk.py +100 -0
src/components/recommendation_engine.py +103 -0
src/config/__init__.py +0 -0
src/config/__pycache__/config.cpython-311.pyc +0 -0
src/config/config.py +10 -0
src/pipeline/__init__.py +0 -0
src/pipeline/__pycache__/data_refresh_workflow.cpython-311.pyc +0 -0
src/pipeline/data_refresh_workflow.py +66 -0
src/utils/__init__.py +0 -0
src/utils/__pycache__/logger.cpython-311.pyc +0 -0
src/utils/__pycache__/logger.cpython-313.pyc +0 -0
src/utils/logger.py +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# ---- Base image ----
+FROM python:3.11-slim
+# ---- Working directory ----
+WORKDIR /app
+# ---- Copy requirements first for efficient caching ----
+COPY requirements.txt .
+# ---- Install dependencies ----
+RUN pip install --upgrade pip setuptools wheel \
+    && pip install --no-cache-dir -r requirements.txt
+# ---- Copy the full project ----
+COPY . .
+# ---- Use Hugging Face's expected port ----
+EXPOSE 7860
+# ---- Run Chainlit ----
+CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,10 @@
 ---
 title: Chainlit Supplychain App
-emoji: 😻
-colorFrom: gray
-colorTo: yellow
 sdk: docker
 pinned: false
-license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Chainlit Supplychain App
+emoji: 📈
+colorFrom: purple
+colorTo: purple
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent))
+import chainlit as cl
+from components.model_nlp_intent import predict_intent
+from components.model_nlp_ner import extract_entities_pipeline
+from components.model_risk_predictor import predict_risk
+from components.recommendation_engine import generate_recommendation
+@cl.on_message
+async def handle_message(msg: cl.Message):
+    query = msg.content
+    session = cl.user_session
+    intent_result = predict_intent(query)
+    intent = intent_result["intent"]
+    confidence = intent_result["confidence"]
+    entities = extract_entities_pipeline(query)
+    region = None
+    origin = None
+    destination = None
+    if entities.get("location"):
+        locations = entities["location"]
+        if isinstance(locations, list) and len(locations) > 0:
+            region = locations[0]
+            if len(locations) > 1:
+                origin = locations[0]
+                destination = locations[1]
+        else:
+            region = locations
+    if not region:
+        region = "Mumbai"
+    incidents = []
+    event_type = None
+    if entities.get("event"):
+        events = entities["event"]
+        if isinstance(events, list):
+            incidents = events
+            event_type = events[0] if events else None
+        else:
+            incidents = [events]
+            event_type = events
+    risk_score = predict_risk(
+        region=region,
+        days=5,
+        origin=origin,
+        destination=destination,
+        event_type=event_type,
+        incidents=incidents
+    )
+    recent_incidents = incidents if incidents else ["port strike", "supplier outage"]
+    weather_alert = "Typhoon warning" if region == "Shanghai" else None
+    advice = generate_recommendation(
+        risk_score=risk_score,
+        region=region,
+        recent_incidents=recent_incidents,
+        weather_alert=weather_alert,
+        intent=intent
+    )
+    if risk_score >= 0.7:
+        risk_emoji = "🔴"
+        risk_level = "High"
+    elif risk_score >= 0.4:
+        risk_emoji = "🟡"
+        risk_level = "Medium"
+    else:
+        risk_emoji = "🟢"
+        risk_level = "Low"
+    response = (
+        f"### 📊 Supply Chain Risk Analysis\n\n"
+        f"**Region:** {region}\n"
+        f"**Intent:** {intent} (Confidence: {confidence:.2%})\n"
+        f"**Entities:** {entities}\n"
+    )
+    if origin and destination:
+        response += f"**Route:** {origin} → {destination}\n"
+    if incidents:
+        response += f"**⚠️ Detected Events:** {', '.join(incidents)}\n"
+    response += f"**Risk Score:** {risk_emoji} **{risk_level}** ({risk_score:.2f})\n\n"
+    response += f"**💡 Recommendation:**\n{advice['message']}\n"
+    await cl.Message(
+        content=response,
+        author="Supply Chain Risk Analysis"
+    ).send()
+    # Send Alert Level
+    alert_emoji = "🚨" if risk_score >= 0.7 else "⚠️" if risk_score >= 0.4 else "✅"
+    await cl.Message(
+        content=f"{alert_emoji} **Alert Level:** {advice['action'].upper()}",
+        author="Alert Level"
+    ).send()
+@cl.on_chat_start
+async def welcome():
+    await cl.Message(
+        content=(
+            "# 🌐 Welcome to AI-Powered Supply Chain Risk Advisor\n\n"
+            "I provide **real-time risk analysis** and **mitigation strategies** "
+            "based on:\n"
+            "- 🌍 **Regional factors** (port congestion, infrastructure)\n"
+            "- ⚠️ **Active events** (strikes, typhoons, disruptions)\n"
+            "- 🚢 **Route analysis** (origin to destination)\n"
+            "- 🤖 **ML-powered predictions** (trained on historical data)\n\n"
+            "### 💬 Example Questions:\n\n"
+            "- \"Is there any delay in vessels from USA to UAE?\"\n"
+            "- \"What should I do about the port strike in Shanghai?\"\n"
+            "- \"Are there weather problems affecting shipments to Germany?\"\n"
+            "- \"Risk level for Mumbai to Singapore route?\"\n\n"
+            "**Ask me anything about your supply chain risks!** 🚀"
+        ),
+        author="Risk Advisor Bot"
+    ).send()

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+# Base
+numpy==1.26.4
+pandas==2.2.2
+requests==2.32.3
+# Machine Learning
+scikit-learn==1.5.2
+joblib==1.4.2
+# Deep Learning (TensorFlow + Keras)
+tensorflow==2.15.0
+keras==2.15.0
+protobuf==3.20.3
+# NLP / Transformers
+transformers==4.37.2
+sentencepiece==0.2.0
+torch==2.1.0
+# Backend / Web
+fastapi==0.110.2
+uvicorn==0.25.0
+python-dotenv==1.0.1
+pydantic<2
+# Chainlit App
+chainlit==1.1.301
+# Tools
+pytest==8.3.2
+pytest-asyncio==0.24.0

src/app/__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (5.24 kB). View file

src/app/__pycache__/chatbot.cpython-311.pyc ADDED Viewed

Binary file (5.25 kB). View file

src/app/__pycache__/chatbot.cpython-313.pyc ADDED Viewed

Binary file (3.1 kB). View file

src/app/__pycache__/fastapi_server.cpython-311.pyc ADDED Viewed

Binary file (5.27 kB). View file

src/app/app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+import chainlit as cl
+from components.model_nlp_intent import predict_intent
+from components.model_nlp_ner import extract_entities_pipeline
+from components.model_risk_predictor import predict_risk
+from components.recommendation_engine import generate_recommendation
+@cl.on_message
+async def handle_message(msg: cl.Message):
+    query = msg.content
+    session = cl.user_session
+    intent_result = predict_intent(query)
+    intent = intent_result["intent"]
+    confidence = intent_result["confidence"]
+    entities = extract_entities_pipeline(query)
+    region = None
+    origin = None
+    destination = None
+    if entities.get("location"):
+        locations = entities["location"]
+        if isinstance(locations, list) and len(locations) > 0:
+            region = locations[0]
+            if len(locations) > 1:
+                origin = locations[0]
+                destination = locations[1]
+        else:
+            region = locations
+    if not region:
+        region = "Mumbai"
+    incidents = []
+    event_type = None
+    if entities.get("event"):
+        events = entities["event"]
+        if isinstance(events, list):
+            incidents = events
+            event_type = events[0] if events else None
+        else:
+            incidents = [events]
+            event_type = events
+    risk_score = predict_risk(
+        region=region,
+        days=5,
+        origin=origin,
+        destination=destination,
+        event_type=event_type,
+        incidents=incidents
+    )
+    recent_incidents = incidents if incidents else ["port strike", "supplier outage"]
+    weather_alert = "Typhoon warning" if region == "Shanghai" else None
+    advice = generate_recommendation(
+        risk_score=risk_score,
+        region=region,
+        recent_incidents=recent_incidents,
+        weather_alert=weather_alert,
+        intent=intent
+    )
+    if risk_score >= 0.7:
+        risk_emoji = "🔴"
+        risk_level = "High"
+    elif risk_score >= 0.4:
+        risk_emoji = "🟡"
+        risk_level = "Medium"
+    else:
+        risk_emoji = "🟢"
+        risk_level = "Low"
+    response = (
+        f"### 📊 Supply Chain Risk Analysis\n\n"
+        f"**Region:** {region}\n"
+        f"**Intent:** {intent} (Confidence: {confidence:.2%})\n"
+        f"**Entities:** {entities}\n"
+    )
+    if origin and destination:
+        response += f"**Route:** {origin} → {destination}\n"
+    if incidents:
+        response += f"**⚠️ Detected Events:** {', '.join(incidents)}\n"
+    response += f"**Risk Score:** {risk_emoji} **{risk_level}** ({risk_score:.2f})\n\n"
+    response += f"**💡 Recommendation:**\n{advice['message']}\n"
+    await cl.Message(
+        content=response,
+        author="Supply Chain Risk Analysis"
+    ).send()
+    # Send Alert Level
+    alert_emoji = "🚨" if risk_score >= 0.7 else "⚠️" if risk_score >= 0.4 else "✅"
+    await cl.Message(
+        content=f"{alert_emoji} **Alert Level:** {advice['action'].upper()}",
+        author="Alert Level"
+    ).send()
+@cl.on_chat_start
+async def welcome():
+    await cl.Message(
+        content=(
+            "# 🌐 Welcome to AI-Powered Supply Chain Risk Advisor\n\n"
+            "I provide **real-time risk analysis** and **mitigation strategies** "
+            "based on:\n"
+            "- 🌍 **Regional factors** (port congestion, infrastructure)\n"
+            "- ⚠️ **Active events** (strikes, typhoons, disruptions)\n"
+            "- 🚢 **Route analysis** (origin to destination)\n"
+            "- 🤖 **ML-powered predictions** (trained on historical data)\n\n"
+            "### 💬 Example Questions:\n\n"
+            "- \"Is there any delay in vessels from USA to UAE?\"\n"
+            "- \"What should I do about the port strike in Shanghai?\"\n"
+            "- \"Are there weather problems affecting shipments to Germany?\"\n"
+            "- \"Risk level for Mumbai to Singapore route?\"\n\n"
+            "**Ask me anything about your supply chain risks!** 🚀"
+        ),
+        author="Risk Advisor Bot"
+    ).send()

src/app/fastapi_server.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from fastapi import FastAPI, Query
+from typing import Optional, List
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from components.model_nlp_intent import predict_intent
+from components.model_nlp_ner import extract_entities
+from components.model_risk_predictor import predict_risk
+from components.recommendation_engine import generate_recommendation
+app = FastAPI(
+    title="Supply Chain Risk Advisor API",
+    description="Provides risk prediction, event queries, and mitigation recommendations.",
+    version="1.0"
+)
+@app.get("/health/")
+def health():
+    return {"status": "ok"}
+@app.get("/nlp/")
+def nlp_analysis(query: str):
+    """Run both intent and entity extraction on a user query."""
+    intent_result = predict_intent(query)
+    entities = extract_entities(query)
+    return {
+        "query": query,
+        "intent": intent_result["intent"],
+        "confidence": intent_result["confidence"],
+        "entities": entities
+    }
+@app.get("/predict-risk/")
+def predict_risk_api(region: str, days: Optional[int] = 5):
+    """Return risk prediction for a region next N days."""
+    risk_score = predict_risk(region, days)
+    return {"region": region, "risk_score": risk_score, "days": days}
+@app.get("/events/")
+def events_api(region: Optional[str] = None):
+    """Query past incidents/events for a region or all regions."""
+    # Replace this with real event loading (e.g., from your snapshot/data files)
+    sample_events = [
+        {"region": "Germany", "event": "railway strike", "date": "2025-09-23"},
+        {"region": "Mumbai", "event": "weather alert", "date": "2025-10-05"},
+        {"region": "Shanghai", "event": "typhoon", "date": "2025-09-30"},
+    ]
+    if region:
+        filtered = [ev for ev in sample_events if ev["region"].lower() == region.lower()]
+        return {"events": filtered}
+    return {"events": sample_events}
+@app.get("/recommendation/")
+def recommendation_api(
+    region: str,
+    risk: float,
+    intent: Optional[str] = None,
+    recent_incidents: Optional[List[str]] = Query(None),
+    weather_alert: Optional[str] = None
+):
+    """Get mitigation recommendation for region and risk."""
+    advice = generate_recommendation(
+        risk_score=risk,
+        region=region,
+        recent_incidents=recent_incidents,
+        weather_alert=weather_alert,
+        intent=intent
+    )
+    return advice
+@app.get("/bot/")
+def chatbot_api(query: str):
+    """Full pipeline: intent, entities, risk prediction and recommendation."""
+    intent_result = predict_intent(query)
+    entities = extract_entities(query)
+    # Use the first location found or default to Mumbai for demo if missing
+    region = None
+    if entities.get("location"):
+        region = entities["location"][0] if isinstance(entities["location"], list) and entities["location"] else entities["location"]
+    if not region:
+        region = "Mumbai"
+    risk_score = predict_risk(region, 5)
+    recent_incidents = ["port strike", "supplier outage"] if region else []
+    weather_alert = "Typhoon warning" if region == "Shanghai" else None
+    advice = generate_recommendation(
+        risk_score=risk_score,
+        region=region,
+        recent_incidents=recent_incidents,
+        weather_alert=weather_alert,
+        intent=intent_result.get("intent")
+    )
+    return {
+        "query": query,
+        "intent": intent_result["intent"],
+        "confidence": intent_result["confidence"],
+        "entities": entities,
+        "region": region,
+        "risk_score": risk_score,
+        "advice": advice
+    }

src/components/__init__.py ADDED Viewed

File without changes

src/components/__pycache__/api_gnews_fetcher.cpython-311.pyc ADDED Viewed

Binary file (2.76 kB). View file

src/components/__pycache__/api_weather_fetcher.cpython-311.pyc ADDED Viewed

Binary file (4.59 kB). View file

src/components/__pycache__/data_ingestion.cpython-313.pyc ADDED Viewed

Binary file (4.15 kB). View file

src/components/__pycache__/model_nlp_intent.cpython-311.pyc ADDED Viewed

Binary file (7.25 kB). View file

src/components/__pycache__/model_nlp_ner.cpython-311.pyc ADDED Viewed

Binary file (19.3 kB). View file

src/components/__pycache__/model_risk_predictor.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

src/components/__pycache__/recommendation_engine.cpython-311.pyc ADDED Viewed

Binary file (3.59 kB). View file

src/components/api_gnews_fetcher.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import requests
+from dotenv import load_dotenv
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+load_dotenv()
+GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
+GNEWS_API_ENDPOINT = "https://gnews.io/api/v4/search"
+class GNewsFetcher:
+    def __init__(self, api_key=GNEWS_API_KEY, endpoint=GNEWS_API_ENDPOINT):
+        self.api_key = api_key
+        self.endpoint = endpoint
+        if not self.api_key:
+            logger.error("GNEWS_API_KEY environment variable not set.")
+    def fetch_news(self, keyword, max_results=100):
+        params = {
+            'q': keyword,
+            'token': self.api_key,
+            'lang': 'en',
+            'max': max_results,
+        }
+        try:
+            logger.info(f"Fetching GNews for keyword: {keyword}")
+            response = requests.get(self.endpoint, params=params)
+            response.raise_for_status()
+            articles = response.json().get('articles', [])
+            logger.info(f"Fetched {len(articles)} articles for '{keyword}'")
+            return articles
+        except Exception as e:
+            logger.error(f"GNews fetch error for '{keyword}': {e}")
+            return []

src/components/api_weather_fetcher.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import requests
+from dotenv import load_dotenv
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+load_dotenv()
+WEATHERBIT_API_KEY = os.getenv("WEATHERBIT_API_KEY")
+ENDPOINT = "https://api.weatherbit.io/v2.0/current"
+class WeatherFetcher:
+    def __init__(self, api_key=WEATHERBIT_API_KEY, endpoint=ENDPOINT):
+        self.api_key = api_key
+        self.endpoint = endpoint
+        if not self.api_key:
+            logger.error("WEATHERBIT_API_KEY environment variable not set.")
+    def fetch_weather(self, lat, lon):
+        params = {
+            "lat": lat,
+            "lon": lon,
+            "key": self.api_key
+        }
+        try:
+            logger.info(f"Fetching weather for lat/lon: {lat},{lon}")
+            response = requests.get(self.endpoint, params=params)
+            response.raise_for_status()
+            logger.info(f"Weather fetch success for {lat},{lon}")
+            return response.json()
+        except Exception as e:
+            logger.error(f"WeatherBit fetch error for {lat},{lon}: {e}")
+            return None
+    @staticmethod
+    def extract_weather(data, loc):
+        if data and "data" in data and len(data["data"]) > 0:
+            entry = data["data"][0]
+            logger.info(f"Extracting weather for {loc['city']}, {loc['country']}")
+            return {
+                "city": loc["city"],
+                "country": loc["country"],
+                "lat": loc["lat"],
+                "lon": loc["lon"],
+                "timestamp": entry.get("ts"),
+                "datetime": entry.get("datetime"),
+                "temp": entry.get("temp"),
+                "weather_main": entry["weather"].get("description"),
+                "weather_code": entry["weather"].get("code"),
+                "precip": entry.get("precip"),
+                "wind_spd": entry.get("wind_spd"),
+                "wind_dir": entry.get("wind_cdir_full"),
+                "clouds": entry.get("clouds"),
+                "aqi": entry.get("aqi", None),
+                "visibility": entry.get("vis"),
+                "alert": "Yes" if entry["weather"].get("code", 800) >= 700 else "No"
+            }
+        else:
+            logger.warning("No valid weather data structure to extract.")
+            return None

src/components/data_cleaning.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import pandas as pd
+import numpy as np
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def clean_news_events(df):
+    required_columns = ['title', 'publishedAt', 'description', 'source', 'url']
+    df = df[[col for col in required_columns if col in df.columns]]
+    df = df.drop_duplicates(subset=['title', 'publishedAt'])
+    df['title'] = df['title'].str.strip().str.lower()
+    df['description'] = df['description'].str.strip().str.lower()
+    df['publishedAt'] = pd.to_datetime(df['publishedAt'], errors='coerce')
+    df = df.dropna(subset=['title', 'publishedAt'])
+    logger.info(f"Cleaned news events: {df.shape}")
+    return df
+def clean_weather_alerts(df):
+    keep_cols = ['city', 'country', 'lat', 'lon', 'weather_main', 'timestamp']
+    df = df[[col for col in keep_cols if col in df.columns]].copy()
+    df['city'] = df['city'].str.strip().str.title()
+    df['country'] = df['country'].str.strip().str.upper()
+    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
+    df = df.dropna(subset=['city', 'timestamp'])
+    logger.info(f"Cleaned weather alerts: {df.shape}")
+    return df
+def clean_supply_chain_disruptions(df):
+    df = df.drop_duplicates()
+    df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'], errors='coerce')
+    df['shipping date (DateOrders)'] = pd.to_datetime(df['shipping date (DateOrders)'], errors='coerce')
+    if 'Late_delivery_risk' in df.columns:
+        df['Late_delivery_risk'] = df['Late_delivery_risk'].fillna(0).astype(int)
+    if 'Order Status' in df.columns:
+        df['Order Status'] = df['Order Status'].str.strip().str.title()
+    logger.info(f"Cleaned supply chain CSV: {df.shape}")
+    return df
+if __name__ == "__main__":
+    artifacts = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw"
+    try:
+        news_df = pd.read_json(artifacts / "news_events.json")
+        cleaned_news = clean_news_events(news_df)
+        logger.info(f"News Alerts cleaned successfully: shape {cleaned_news.shape}")
+    except Exception as e:
+        logger.error(f"Error cleaning news: {e}")
+    try:
+        weather_df = pd.read_json(artifacts / "weather_alerts.json")
+        cleaned_weather = clean_weather_alerts(weather_df)
+        logger.info(f"Weather Alerts cleaned successfully: shape {cleaned_weather.shape}")
+    except Exception as e:
+        logger.error(f"Error cleaning weather: {e}")
+    try:
+        try:
+            sc_df = pd.read_csv(artifacts / "DataCoSupplyChainDataset.csv", encoding="utf-8")
+        except UnicodeDecodeError:
+            sc_df = pd.read_csv(artifacts / "DataCoSupplyChainDataset.csv", encoding="ISO-8859-1")
+        cleaned_sc = clean_supply_chain_disruptions(sc_df)
+        logger.info(f"Supply chain CSV cleaned successfully: shape {cleaned_sc.shape}")
+    except Exception as e:
+        logger.error(f"Error cleaning supply chain CSV: {e}")
+processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
+processed_dir.mkdir(parents=True, exist_ok=True)
+# Save cleaned datasets
+cleaned_news.to_csv(processed_dir / "news_events_clean.csv", index=False)
+cleaned_weather.to_csv(processed_dir / "weather_alerts_clean.csv", index=False)
+cleaned_sc.to_csv(processed_dir / "supply_chain_disruptions_clean.csv", index=False)

src/components/data_ingestion.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import json
+import pandas as pd
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def load_news_events(path=None):
+    if path is None:
+        path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "news_events.json"
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = pd.DataFrame(json.load(f))
+        logger.info(f"News events loaded successfully: {data.shape}")
+        return data
+    except Exception as e:
+        logger.error(f"Failed to load news events: {e}")
+        raise
+def load_weather_alerts(path=None):
+    if path is None:
+        path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "weather_alerts.json"
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = pd.DataFrame(json.load(f))
+        logger.info(f"Weather alerts loaded successfully: {data.shape}")
+        return data
+    except Exception as e:
+        logger.error(f"Failed to load weather alerts: {e}")
+        raise
+def load_supply_chain_disruptions(csv_path=None):
+    if csv_path is None:
+        csv_path = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "raw" / "DataCoSupplyChainDataset.csv"
+    try:
+        df = pd.read_csv(csv_path, encoding="utf-8")
+        logger.info(f"Historic incidents loaded successfully: {df.shape}")
+        return df
+    except UnicodeDecodeError:
+        df = pd.read_csv(csv_path, encoding="ISO-8859-1")
+        logger.info(f"Historic incidents loaded successfully (ISO-8859-1): {df.shape}")
+        return df
+    except Exception as e:
+        logger.error(f"Failed to load historic supply chain CSV: {e}")
+        raise
+if __name__ == "__main__":
+    try:
+        news_df = load_news_events()
+        weather_df = load_weather_alerts()
+        try:
+            incidents_df = load_supply_chain_disruptions()
+        except Exception as e:
+            logger.error(f"No historic CSV loaded: {e}")
+    except Exception as e:
+        logger.error(f"Major error in data ingestion: {e}")

src/components/feature_engineering.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import pandas as pd
+import numpy as np
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def engineer_news_features(df):
+    KEYWORDS = {
+        "strike": ["strike", "walkout", "protest"],
+        "disaster": ["earthquake", "flood", "hurricane", "typhoon", "fire", "storm"],
+        "accident": ["collision", "accident", "spill", "blockage"],
+    }
+    for key, words in KEYWORDS.items():
+        df[f"is_{key}"] = (
+            df['title'].str.contains('|'.join(words), case=False, na=False) |
+            df['description'].str.contains('|'.join(words), case=False, na=False)
+        )
+    if "publishedAt" in df.columns:
+        df["event_weekday"] = pd.to_datetime(df["publishedAt"], errors='coerce').dt.weekday
+        df["event_hour"] = pd.to_datetime(df["publishedAt"], errors='coerce').dt.hour
+    logger.info(f"Engineered news event features: {df.shape}")
+    return df
+def engineer_weather_features(df):
+    severe_words = ["Storm", "Thunderstorm", "Rain", "Snow", "Hurricane", "Extreme"]
+    df["severe_weather"] = df["weather_main"].str.contains('|'.join(severe_words), case=False, na=False)
+    if "weather_main" in df.columns:
+        df = pd.get_dummies(df, columns=["weather_main"], prefix="weather")
+    if "timestamp" in df.columns:
+        df["month"] = pd.to_datetime(df["timestamp"], errors='coerce').dt.month
+        df["season"] = pd.to_datetime(df["timestamp"], errors='coerce').dt.month % 12 // 3 + 1
+    logger.info(f"Engineered weather features: {df.shape}")
+    return df
+def engineer_supply_chain_features(df):
+    if "order date (DateOrders)" in df.columns and "shipping date (DateOrders)" in df.columns:
+        df["lead_time_days"] = (
+            pd.to_datetime(df["shipping date (DateOrders)"], errors='coerce') -
+            pd.to_datetime(df["order date (DateOrders)"], errors='coerce')
+        ).dt.days
+    for col in ["Order Status", "Product Status", "Shipping Mode", "Order Region", "Order Country"]:
+        if col in df.columns:
+            df = pd.get_dummies(df, columns=[col], prefix=col.replace(' ', '_'))
+    if "Late_delivery_risk" in df.columns:
+        df["is_late"] = df["Late_delivery_risk"] > 0
+    logger.info(f"Engineered supply chain features: {df.shape}")
+    return df
+if __name__ == "__main__":
+    processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
+    try:
+        news_df = pd.read_csv(processed_dir / "news_events_clean.csv")
+        news_feats = engineer_news_features(news_df)
+        news_feats.to_csv(processed_dir / "news_events_features.csv", index=False)
+        logger.info("Saved engineered news features.")
+    except Exception as e:
+        logger.error(f"Error engineering news features: {e}")
+    try:
+        weather_df = pd.read_csv(processed_dir / "weather_alerts_clean.csv")
+        weather_feats = engineer_weather_features(weather_df)
+        weather_feats.to_csv(processed_dir / "weather_alerts_features.csv", index=False)
+        logger.info("Saved engineered weather features.")
+    except Exception as e:
+        logger.error(f"Error engineering weather features: {e}")
+    try:
+        sc_df = pd.read_csv(processed_dir / "supply_chain_disruptions_clean.csv", encoding="utf-8")
+        sc_feats = engineer_supply_chain_features(sc_df)
+        sc_feats.to_csv(processed_dir / "supply_chain_disruptions_features.csv", index=False)
+        logger.info("Saved engineered supply chain features.")
+    except Exception as e:
+        logger.error(f"Error engineering supply chain features: {e}")

src/components/model_nlp_intent.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import joblib
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def generate_synthetic_data():
+    data = {
+        'text': [
+            "What's the risk for Mumbai shipments?",
+            "Any delays expected for Shanghai routes?",
+            "Is there disruption risk for my order?",
+            "Check risk status for Delhi delivery",
+            "Are there any supply chain issues?",
+            "Any weather alerts today?",
+            "What's the weather situation in Beijing?",
+            "Are there storms affecting deliveries?",
+            "Weather conditions for logistics?",
+            "Any severe weather warnings?",
+            "What should I do about delays?",
+            "How to avoid supply chain risks?",
+            "Suggest alternative routes",
+            "What are my options for rerouting?",
+            "Help me mitigate delivery issues",
+            "Hello, how can you help?",
+            "What can this system do?",
+            "I need information about logistics",
+            "Tell me about your capabilities",
+            "How does this chatbot work?"
+        ],
+        'intent': [
+            'risk_check', 'risk_check', 'risk_check', 'risk_check', 'risk_check',
+            'weather_alert', 'weather_alert', 'weather_alert', 'weather_alert', 'weather_alert',
+            'mitigation_help', 'mitigation_help', 'mitigation_help', 'mitigation_help', 'mitigation_help',
+            'general_query', 'general_query', 'general_query', 'general_query', 'general_query'
+        ]
+    }
+    return pd.DataFrame(data)
+def main():
+    df = generate_synthetic_data()
+    label_encoder = LabelEncoder()
+    df['label'] = label_encoder.fit_transform(df['intent'])
+    X_train, X_test, y_train, y_test = train_test_split(
+        df['text'], df['label'], test_size=0.2, random_state=42, stratify=df['label']
+    )
+    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
+    model = TFDistilBertForSequenceClassification.from_pretrained(
+        'distilbert-base-uncased',
+        num_labels=len(label_encoder.classes_)
+    )
+    train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=128, return_tensors='tf')
+    test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=128, return_tensors='tf')
+    train_dataset = tf.data.Dataset.from_tensor_slices((
+        dict(train_encodings),
+        y_train.values
+    )).batch(8)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
+        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        metrics=['accuracy']
+    )
+    model.fit(train_dataset, epochs=3)
+    model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_intent"
+    model_dir.mkdir(parents=True, exist_ok=True)
+    model.save_pretrained(model_dir / "intent_model")
+    tokenizer.save_pretrained(model_dir / "intent_tokenizer")
+    joblib.dump(label_encoder, model_dir / "label_encoder.joblib")
+    logger.info(f"Intent classification model saved to {model_dir}")
+    test_queries = [
+        "Is there risk for my Beijing shipment?",
+        "Any weather problems today?",
+        "What should I do about delays?"
+    ]
+    for query in test_queries:
+        inputs = tokenizer(query, return_tensors='tf', truncation=True, padding=True, max_length=128)
+        outputs = model(inputs)
+        predicted_class = tf.argmax(outputs.logits, axis=1).numpy()[0]
+        intent = label_encoder.inverse_transform([predicted_class])[0]
+        confidence = tf.nn.softmax(outputs.logits)[0][predicted_class].numpy()
+        logger.info(f"Query: '{query}' -> Intent: {intent} (Confidence: {confidence:.3f})")
+def predict_intent(text: str) -> dict:
+    model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_intent"
+    model = TFDistilBertForSequenceClassification.from_pretrained(model_dir / "intent_model")
+    tokenizer = DistilBertTokenizer.from_pretrained(model_dir / "intent_tokenizer")
+    label_encoder = joblib.load(model_dir / "label_encoder.joblib")
+    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)
+    outputs = model(inputs)
+    predicted_class = tf.argmax(outputs.logits, axis=1).numpy()[0]
+    intent = label_encoder.inverse_transform([predicted_class])[0]
+    confidence = float(tf.nn.softmax(outputs.logits)[0][predicted_class].numpy())
+    return {"intent": intent, "confidence": confidence}
+if __name__ == "__main__":
+    main()

src/components/model_nlp_ner.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import tensorflow as tf
+from transformers import DistilBertTokenizerFast, TFDistilBertForTokenClassification, pipeline
+from sklearn.model_selection import train_test_split
+import numpy as np
+import joblib
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+EPOCHS = 30
+BATCH_SIZE = 8
+LEARNING_RATE = 5e-5
+VALIDATION_SPLIT = 0.15
+PATIENCE = 3
+try:
+    from tensorflow_addons.optimizers import AdamW
+    optimizer = AdamW(learning_rate=LEARNING_RATE, weight_decay=1e-2)
+except ImportError:
+    optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
+examples = [
+    (["Delay", "in", "Shanghai", "due", "to", "storms"],  ["O", "O", "B-LOC", "O", "O", "B-EVENT"]),
+    (["Any", "delay", "in", "vessel", "from", "USA", "to", "UAE", "?"], ["O", "O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O"]),
+    (["Cargo", "stuck", "at", "UAE", "port"], ["O", "O", "O", "B-LOC", "O"]),
+    (["Weather", "alert", "for", "USA"], ["O", "O", "O", "B-LOC"]),
+    (["Flood", "risk", "in", "Mumbai"], ["O", "O", "O", "B-LOC"]),
+    (["Port", "closure", "Middle", "East"], ["O", "O", "B-LOC", "I-LOC"]),
+    (["Is", "cargo", "delayed", "from", "USA", "to", "India", "?"], ["O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O"]),
+    (["Weather", "problems", "expected", "in", "USA"], ["O", "O", "O", "O", "B-LOC"]),
+    (["Port", "strike", "at", "Singapore"], ["O", "O", "O", "B-LOC"]),
+    (["Typhoon", "in", "Japan"], ["B-EVENT", "O", "B-LOC"]),
+    (["Reroute", "shipments", "from", "Los", "Angeles"], ["O", "O", "O", "B-LOC", "I-LOC"]),
+    (["Supply", "disruption", "Middle", "East"], ["O", "O", "B-LOC", "I-LOC"]),
+    (["Severe", "fog", "in", "United", "Arab", "Emirates"], ["O", "O", "O", "B-LOC", "I-LOC", "I-LOC"]),
+    (["Are", "shipments", "to", "Brazil", "affected", "by", "strike", "?"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT", "O"]),
+    (["Is", "Paris", "airport", "open", "after", "floods", "?"], ["O", "B-LOC", "O", "O", "O", "B-EVENT", "O"]),
+    (["Delay", "reported", "in", "Berlin"], ["O", "O", "O", "B-LOC"]),
+    (["Export", "hold", "at", "Los", "Angeles"], ["O", "O", "O", "B-LOC", "I-LOC"]),
+    (["Typhoon", "warning", "for", "Japan"], ["B-EVENT", "O", "O", "B-LOC"]),
+    (["Reroute", "cargo", "to", "Singapore"], ["O", "O", "O", "B-LOC"]),
+    (["Is", "there", "labor", "strike", "in", "Canada", "?"], ["O", "O", "O", "B-EVENT", "O", "B-LOC", "O"]),
+    (["Storm", "impact", "on", "United", "Kingdom"], ["B-EVENT", "O", "O", "B-LOC", "I-LOC"]),
+    (["Supply", "disruption", "Italy"], ["O", "O", "B-LOC"]),
+    (["Any", "hold-up", "in", "Dubai", "?",], ["O", "O", "O", "B-LOC", "O"]),
+    (["Cargo", "delay", "at", "Rotterdam", "port"], ["O", "O", "O", "B-LOC", "O"]),
+    (["Flood", "disrupts", "service", "in", "Turkey"], ["B-EVENT", "O", "O", "O", "B-LOC"]),
+    (["Severe", "thunderstorm", "in", "New", "York", "City"], ["O", "B-EVENT", "O", "B-LOC", "I-LOC", "I-LOC"]),
+    (["Is", "Shanghai", "port", "closed", "for", "holiday", "?"], ["O", "B-LOC", "O", "O", "O", "O", "O"]),
+    (["France", "logistics", "strike"], ["B-LOC", "O", "B-EVENT"]),
+    (["Export", "shipment", "to", "Spain", "delayed"], ["O", "O", "O", "B-LOC", "O"]),
+    (["Cargo", "rerouted", "from", "Colombo", "to", "Sydney"], ["O", "O", "O", "B-LOC", "O", "B-LOC"]),
+    (["Vessel", "from", "India", "held", "by", "customs"], ["O", "O", "B-LOC", "O", "O", "O"]),
+    (["Is", "Singapore", "affected", "by", "monsoon", "season", "?"], ["O", "B-LOC", "O", "O", "B-EVENT", "I-EVENT", "O"]),
+    (["Disruption", "in", "United", "Arab", "Emirates", "due", "to", "strike"], ["O", "O", "B-LOC", "I-LOC", "I-LOC", "O", "O", "B-EVENT"]),
+    (["How", "long", "is", "the", "delay", "in", "Mexico", "?"], ["O", "O", "O", "O", "O", "O", "B-LOC", "O"]),
+    (["Flood", "risk", "in", "Gujarat"], ["B-EVENT", "O", "O", "B-LOC"]),
+    (["Severe", "weather", "disrupts", "Melbourne", "port"], ["B-EVENT", "O", "O", "B-LOC", "O"]),
+    (["Export", "stopped", "from", "Jakarta", "because", "of", "strike"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT"]),
+    (["Storm", "warning", "for", "Delhi"], ["B-EVENT", "O", "O", "B-LOC"]),
+    (["Any", "delay", "from", "United", "States", "to", "United", "Kingdom", "?"], ["O", "O", "O", "B-LOC", "I-LOC", "O", "B-LOC", "I-LOC", "O"]),
+    (["Cargo", "stuck", "at", "Sao", "Paulo"], ["O", "O", "O", "B-LOC", "I-LOC"]),
+    (["Shipping", "interruption", "in", "Cairo"], ["O", "O", "O", "B-LOC"]),
+    (["Typhoon", "delays", "cargo", "to", "Hong", "Kong"], ["B-EVENT", "O", "O", "O", "B-LOC", "I-LOC"]),
+    (["No", "disruption", "in", "Berlin"], ["O", "O", "O", "B-LOC"]),
+    (["Port", "closure", "for", "Christmas", "in", "Canada"], ["O", "O", "O", "O", "O", "B-LOC"]),
+    (["Is", "there", "a", "strike", "in", "Melbourne", "?"], ["O", "O", "O", "B-EVENT", "O", "B-LOC", "O"]),
+    (["Shipment", "delayed", "in", "Mexico", "City"], ["O", "O", "O", "B-LOC", "I-LOC"]),
+    (["Are", "vessels", "from", "Copenhagen", "blocked", "?"], ["O", "O", "O", "B-LOC", "O", "O"]),
+    (["Heavy", "rains", "in", "Manila"], ["O", "B-EVENT", "O", "B-LOC"]),
+    (["Strike", "at", "Johannesburg", "port"], ["B-EVENT", "O", "B-LOC", "O"]),
+    (["Is", "the", "route", "from", "Italy", "to", "Brazil", "safe", "?"], ["O", "O", "O", "O", "B-LOC", "O", "B-LOC", "O", "O"]),
+    (["Container", "stuck", "at", "Antwerp"], ["O", "O", "O", "B-LOC"]),
+    (["Any", "blockade", "in", "Pakistan", "?"], ["O", "B-EVENT", "O", "B-LOC", "O"]),
+    (["Flood", "alerts", "for", "Vietnam"], ["B-EVENT", "O", "O", "B-LOC"]),
+    (["Are", "planes", "to", "Madrid", "canceled", "?"], ["O", "O", "O", "B-LOC", "O", "O"]),
+    (["Shipments", "from", "Morocco", "are", "late"], ["O", "O", "B-LOC", "O", "O"]),
+    (["Earthquake", "in", "Indonesia", "affecting", "deliveries"], ["B-EVENT", "O", "B-LOC", "O", "O"]),
+    (["Rail", "disruption", "in", "Melbourne"], ["O", "B-EVENT", "O", "B-LOC"]),
+    (["Any", "closure", "at", "Rotterdam", "port", "?"], ["O", "B-EVENT", "O", "B-LOC", "O", "O"]),
+    (["Landslide", "blocks", "road", "to", "Lima"], ["B-EVENT", "O", "O", "O", "B-LOC"]),
+    (["Flights", "to", "Bangkok", "suspended"], ["O", "O", "B-LOC", "O"]),
+    (["Typhoon", "threat", "for", "Taipei"], ["B-EVENT", "O", "O", "B-LOC"]),
+    (["Is", "Melbourne", "port", "operational", "today", "?"], ["O", "B-LOC", "O", "O", "O", "O"]),
+    (["Japan", "export", "ban"], ["B-LOC", "O", "B-EVENT"]),
+    (["Closure", "in", "Buenos", "Aires"], ["B-EVENT", "O", "B-LOC", "I-LOC"]),
+    (["Truck", "strike", "delaying", "goods", "from", "Poland"], ["O", "B-EVENT", "O", "O", "O", "B-LOC"]),
+    (["Shanghai", "flood", "disrupts", "cargo"], ["B-LOC", "B-EVENT", "O", "O"]),
+    (["Supply", "held", "in", "Turkey", "because", "of", "strike"], ["O", "O", "O", "B-LOC", "O", "O", "B-EVENT"]),
+    (["Port", "congestion", "in", "Los", "Angeles"], ["O", "B-EVENT", "O", "B-LOC", "I-LOC"]),
+    (["Storm", "approaching", "Cape", "Town"], ["B-EVENT", "O", "B-LOC", "I-LOC"]),
+    (["Bad", "weather", "New", "York"], ["O", "B-EVENT", "B-LOC", "I-LOC"]),
+    (["Zambia", "roads", "closed", "due", "to", "flood"], ["B-LOC", "O", "O", "O", "O", "B-EVENT"]),
+    (["Strike", "in", "Athens", "delays", "supply"], ["B-EVENT", "O", "B-LOC", "O", "O"]),
+    (["Transport", "problem", "in", "Perth"], ["O", "B-EVENT", "O", "B-LOC"]),
+    (["Typhoon", "interrupts", "shipments", "to", "Hong", "Kong"], ["B-EVENT", "O", "O", "O", "B-LOC", "I-LOC"]),
+    (["Avalanche", "blocks", "Italian", "border"], ["B-EVENT", "O", "B-LOC", "O"]),
+]
+sentences = [s for s, t in examples]
+tags      = [t for s, t in examples]
+unique_tags = sorted(set(l for ts in tags for l in ts))
+label2id  = {t: i for i, t in enumerate(unique_tags)}
+id2label  = {i: t for t, i in label2id.items()}
+max_len = max(len(s) for s in sentences)
+tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
+def encode(sentences, labels, label2id, max_len):
+    encodings = tokenizer(sentences, is_split_into_words=True, padding='max_length', truncation=True, max_length=max_len, return_tensors='tf')
+    label_ids = []
+    sample_weights = []
+    for i, labs in enumerate(labels):
+        ids = [label2id[l] for l in labs]
+        padding_length = max_len - len(ids)
+        ids += [0]*padding_length
+        weights = [1]*len(labs) + [0]*padding_length
+        label_ids.append(ids)
+        sample_weights.append(weights)
+    encodings['labels'] = tf.convert_to_tensor(label_ids)
+    encodings['sample_weights'] = tf.convert_to_tensor(sample_weights, dtype=tf.float32)
+    return encodings
+def train_ner_model():
+    X_train, X_val, y_train, y_val = train_test_split(sentences, tags, test_size=VALIDATION_SPLIT, random_state=42)
+    train_inputs = encode(X_train, y_train, label2id, max_len)
+    val_inputs = encode(X_val, y_val, label2id, max_len)
+    model = TFDistilBertForTokenClassification.from_pretrained(
+        'distilbert-base-uncased',
+        num_labels=len(label2id),
+        id2label=id2label,
+        label2id=label2id
+    )
+    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'],weighted_metrics=['accuracy'])
+    callback = tf.keras.callbacks.EarlyStopping(
+        monitor='val_loss',
+        patience=PATIENCE,
+        restore_best_weights=True
+    )
+    logger.info("Starting NER model training (tuned).")
+    history = model.fit(
+        {k: v for k, v in train_inputs.items() if k not in ['labels', 'sample_weights']},
+        train_inputs['labels'],
+        sample_weight=train_inputs['sample_weights'],
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        validation_data=(
+            {k: v for k, v in val_inputs.items() if k not in ['labels', 'sample_weights']},
+            val_inputs['labels'],
+            val_inputs['sample_weights']
+        ),
+        callbacks=[callback]
+    )
+    logger.info("Training complete.")
+    logger.info(f"Best validation accuracy: {max(history.history['val_accuracy'])}")
+    out_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_ner"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    model.save_pretrained(out_dir / "ner_model")
+    tokenizer.save_pretrained(out_dir / "ner_tokenizer")
+    joblib.dump(label2id, out_dir / "label2id.joblib")
+    logger.info(f"NER (TF) model, tokenizer, and label map saved to {out_dir}")
+def extract_entities_pipeline(text: str) -> dict:
+    model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "nlp_ner"
+    custom_model = TFDistilBertForTokenClassification.from_pretrained(model_dir / "ner_model")
+    custom_tokenizer = DistilBertTokenizerFast.from_pretrained(model_dir / "ner_tokenizer")
+    label2id = joblib.load(model_dir / "label2id.joblib")
+    id2label = {i: t for t, i in label2id.items()}
+    max_len = 32
+    tokens = text.split()
+    encoding = custom_tokenizer([tokens], is_split_into_words=True, return_tensors='tf', padding='max_length', truncation=True, max_length=max_len)
+    outputs = custom_model({k: v for k, v in encoding.items() if k != "labels"})
+    logits = outputs.logits.numpy()[0]
+    pred_ids = np.argmax(logits, axis=-1)
+    custom_entities = {"location": [], "event": []}
+    current_loc, current_evt = [], []
+    for w, id in zip(tokens, pred_ids[:len(tokens)]):
+        label = id2label[id]
+        if label == "B-LOC":
+            if current_loc:
+                custom_entities["location"].append(" ".join(current_loc))
+                current_loc = []
+            current_loc = [w]
+        elif label == "I-LOC" and current_loc:
+            current_loc.append(w)
+        else:
+            if current_loc:
+                custom_entities["location"].append(" ".join(current_loc))
+                current_loc = []
+        if label == "B-EVENT":
+            if current_evt:
+                custom_entities["event"].append(" ".join(current_evt))
+                current_evt = []
+            current_evt = [w]
+        elif label == "I-EVENT" and current_evt:
+            current_evt.append(w)
+        else:
+            if current_evt:
+                custom_entities["event"].append(" ".join(current_evt))
+                current_evt = []
+    if current_loc:
+        custom_entities["location"].append(" ".join(current_loc))
+    if current_evt:
+        custom_entities["event"].append(" ".join(current_evt))
+    hf_ner = pipeline("ner", grouped_entities=True, model="dbmdz/bert-large-cased-finetuned-conll03-english")
+    hf_results = hf_ner(text)
+    hf_locations = [ent['word'] for ent in hf_results if ent['entity_group'] == "LOC"]
+    all_locations = set(custom_entities["location"]) | set(hf_locations)
+    all_events = custom_entities["event"]
+    return {"location": list(all_locations), "event": all_events}
+if __name__ == "__main__":
+    train_ner_model()

src/components/model_risk_predictor.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
+from sklearn.inspection import permutation_importance
+import joblib
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def main():
+    processed_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
+    model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "risk_predictor"
+    model_dir.mkdir(parents=True, exist_ok=True)
+    df = pd.read_csv(processed_dir / "supply_chain_disruptions_features.csv")
+    target = "is_late"
+    if target not in df.columns:
+        logger.error(f"Target column {target} not found.")
+        return
+    exclude_cols = [
+        target, "Customer Id", "Order Id", "Order Item Id", "Order Customer Id",
+        "Late_delivery_risk", "Late Delivery Risk", "Delivery Status",
+        "lead_time_days", "Days for shipping (real)", "Days for shipment (scheduled)"
+    ]
+    feature_cols = [
+        c for c in df.columns
+        if c not in exclude_cols and df[c].dtype in [np.float64, np.int64, np.bool_, np.int32]
+    ]
+    X = df[feature_cols]
+    y = df[target].astype(int)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
+    logger.info(f"Training data shape: {X_train.shape}, Test data shape: {X_test.shape}")
+    model = HistGradientBoostingClassifier(
+        max_iter=100, learning_rate=1.0, max_depth=1, random_state=42
+    )
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    y_proba = model.predict_proba(X_test)[:, 1]
+    report = classification_report(y_test, y_pred)
+    cm = confusion_matrix(y_test, y_pred)
+    roc_auc = roc_auc_score(y_test, y_proba)
+    logger.info("Classification Report:\n" + report)
+    logger.info(f"Confusion Matrix:\n{cm}")
+    logger.info(f"ROC-AUC: {roc_auc}")
+    result = permutation_importance(model, X_test, y_test, n_repeats=10, random_state=42, n_jobs=-1)
+    importances = result.importances_mean
+    feature_importance = pd.DataFrame({'feature': X_test.columns, 'importance': importances}).sort_values('importance', ascending=False)
+    logger.info("Top 10 Most Important Features (Permutation Importance):")
+    logger.info(feature_importance.head(10).to_string())
+    max_importance = feature_importance['importance'].max()
+    if max_importance > 0.8:
+        logger.warning(f"Potential data leakage: One feature has {max_importance:.3f} importance")
+    model_path = model_dir / "hist_gradient_boosting_risk_predictor.joblib"
+    joblib.dump(model, model_path)
+    logger.info(f"Model saved to {model_path}")
+def build_feature_row(feature_cols, query_dict, reference_row=None):
+    if reference_row is None:
+        reference_row = pd.Series({col: 0 for col in feature_cols})
+    row = reference_row.copy()
+    shipping_mode = query_dict.get('shipping_mode', 'Standard Class')
+    for col in feature_cols:
+        if 'Shipping_Mode' in col and shipping_mode in col:
+            row[col] = 1
+            logger.debug(f"Set shipping mode: {col} = 1")
+    region = query_dict.get('region', '')
+    for col in feature_cols:
+        if 'Order_Country' in col or 'Order_Region' in col:
+            if region in col:
+                row[col] = 1
+    for col in feature_cols:
+        if 'Order_Status_COMPLETE' in col:
+            row[col] = 1
+    return row
+REGION_BASE_RISKS = {
+    "Shanghai": 0.55,
+    "Singapore": 0.30,
+    "Mumbai": 0.45,
+    "Dubai": 0.35,
+    "UAE": 0.35,
+    "USA": 0.30,
+    "Germany": 0.25,
+    "China": 0.55,
+    "India": 0.45,
+    "Hong Kong": 0.50,
+    "Rotterdam": 0.28,
+    "Los Angeles": 0.40,
+}
+EVENT_RISK_MULTIPLIERS = {
+    "strike": 0.30,
+    "port strike": 0.35,
+    "typhoon": 0.35,
+    "hurricane": 0.35,
+    "earthquake": 0.40,
+    "flood": 0.25,
+    "port closure": 0.45,
+    "supplier outage": 0.25,
+    "customs delay": 0.15,
+    "congestion": 0.20,
+    "pandemic": 0.30,
+    "war": 0.50,
+    "sanctions": 0.40,
+}
+def calculate_rule_based_risk(region, days, incidents):
+    base_risk = REGION_BASE_RISKS.get(region, 0.40)
+    event_risk = 0.0
+    if incidents:
+        for incident in incidents:
+            incident_lower = str(incident).lower()
+            for event_keyword, multiplier in EVENT_RISK_MULTIPLIERS.items():
+                if event_keyword in incident_lower:
+                    event_risk += multiplier
+                    logger.debug(f"Event '{event_keyword}' detected in '{incident}', adding {multiplier}")
+    time_factor = max(0.1, 1.0 - (days / 30.0))
+    rule_risk = (base_risk * 0.5 + event_risk * 0.4 + time_factor * 0.1)
+    return min(1.0, rule_risk)
+def predict_risk(region: str, days: int = 5, origin=None, destination=None,
+                 event_type=None, incidents=None, shipping_mode=None):
+    try:
+        import joblib
+        import pandas as pd
+        from pathlib import Path
+        model_dir = Path(__file__).resolve().parents[2] / "artifacts" / "models" / "risk_predictor"
+        model_path = model_dir / "hist_gradient_boosting_risk_predictor.joblib"
+        if shipping_mode is None:
+            shipping_mode = "Standard Class"
+        rule_risk = calculate_rule_based_risk(region, days, incidents or [])
+        logger.info(f"Rule-based risk for {region}: {rule_risk:.3f}")
+        ml_risk = 0.40
+        if model_path.exists():
+            try:
+                model = joblib.load(model_path)
+                logger.debug(f"Loaded ML model from {model_path}")
+                data_dir = Path(__file__).resolve().parents[2] / "artifacts" / "data" / "processed"
+                feature_csv_path = data_dir / "supply_chain_disruptions_features.csv"
+                if feature_csv_path.exists():
+                    feature_csv = pd.read_csv(feature_csv_path)
+                    feature_cols = list(model.feature_names_in_) if hasattr(model, "feature_names_in_") else list(feature_csv.columns)
+                    reference_row = feature_csv[feature_cols].median()
+                    query_dict = {
+                        "region": region,
+                        "days": days,
+                        "origin": origin,
+                        "destination": destination,
+                        "shipping_mode": shipping_mode,
+                    }
+                    test_features = pd.DataFrame([build_feature_row(feature_cols, query_dict, reference_row)])
+                    ml_risk = float(model.predict_proba(test_features)[0, 1])
+                    logger.info(f"ML model risk for {region}: {ml_risk:.3f}")
+            except Exception as e:
+                logger.warning(f"Could not get ML prediction: {e}")
+        if incidents and len(incidents) > 0:
+            final_risk = (ml_risk * 0.40) + (rule_risk * 0.60)
+            logger.info(f"Hybrid risk (with incidents): ML={ml_risk:.3f}*0.4 + Rule={rule_risk:.3f}*0.6 = {final_risk:.3f}")
+        else:
+            final_risk = (ml_risk * 0.70) + (rule_risk * 0.30)
+            logger.info(f"Hybrid risk (no incidents): ML={ml_risk:.3f}*0.7 + Rule={rule_risk:.3f}*0.3 = {final_risk:.3f}")
+        final_risk = float(np.clip(final_risk, 0.0, 1.0))
+        return round(final_risk, 2)
+    except Exception as e:
+        logger.error(f"Error in predict_risk: {e}", exc_info=True)
+        return 0.50
+if __name__ == "__main__":
+    main()
+    print("\n" + "="*60)
+    print("Testing HYBRID Risk Predictions (ML + Rules)")
+    print("="*60)
+    print("\n1. UAE with no events:")
+    risk1 = predict_risk("UAE", days=5, incidents=[])
+    print(f"   → Risk Score: {risk1:.2f}")
+    print("\n2. Shanghai with port strike:")
+    risk2 = predict_risk("Shanghai", days=5, incidents=["port strike"])
+    print(f"   → Risk Score: {risk2:.2f}")
+    print(f"   → Increase: +{(risk2-risk1)*100:.1f}%")
+    print("\n3. Mumbai with typhoon and port congestion:")
+    risk3 = predict_risk("Mumbai", days=3, incidents=["typhoon", "port congestion"])
+    print(f"   → Risk Score: {risk3:.2f}")
+    print(f"   → Increase: +{(risk3-risk1)*100:.1f}%")
+    print("\n4. USA to Singapore route (no events):")
+    risk4 = predict_risk("Singapore", days=7, origin="USA", destination="Singapore", incidents=[])
+    print(f"   → Risk Score: {risk4:.2f}")
+    print("\n5. USA to Singapore with equipment failure:")
+    risk5 = predict_risk("Singapore", days=7, origin="USA", destination="Singapore",
+                        incidents=["equipment failure", "customs delay"])
+    print(f"   → Risk Score: {risk5:.2f}")
+    print(f"   → Increase: +{(risk5-risk4)*100:.1f}%")
+    print("\n6. Shanghai with multiple critical events:")
+    risk6 = predict_risk("Shanghai", days=2, incidents=["typhoon", "port strike", "port closure"])
+    print(f"   → Risk Score: {risk6:.2f} ")
+    print("\n" + "="*60)
+    print("Hybrid approach combines:")
+    print("   - ML Model: Historical shipping patterns")
+    print("   - Rules: Real-time events and regional factors")
+    print("="*60)

src/components/model_timeseries_risk.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.utils import class_weight
+import joblib
+from pathlib import Path
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+base_dir = Path(__file__).resolve().parents[2]
+data_path = base_dir / "artifacts" / "data" / "processed" / "supply_chain_disruptions_features.csv"
+df = pd.read_csv(data_path)
+region_col = "Order City"
+region_name = "Shanghai"
+df_region = df[df[region_col] == region_name].copy()
+if len(df_region) < 100:
+    logger.warning("Region sample is small, upsampling/cropping to 200 rows from full dataset.")
+    df_region = df.sample(200, random_state=42) if len(df) >= 200 else df
+feature_cols = [
+    "Days for shipping (real)", "Sales per customer", "Order Item Discount",
+    "Order Item Product Price", "Order Item Quantity"
+]
+label_col = "Late_delivery_risk"
+seq_length = 7
+X_all = df_region[feature_cols].fillna(0).astype(float).values
+y_all = df_region[label_col].fillna(0).astype(int).values
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X_all)
+X_seq, y_seq = [], []
+for i in range(len(X_scaled) - seq_length):
+    X_seq.append(X_scaled[i:i+seq_length])
+    y_seq.append(y_all[i+seq_length])
+X_seq = np.array(X_seq)
+y_seq = np.array(y_seq)
+logger.info(f"Sequence shape: {X_seq.shape}; Labels: {y_seq.shape}")
+if len(X_seq) < 2:
+    logger.error("Not enough sequences. Add more data or lower seq_length.")
+    exit()
+test_size = int(0.2 * len(X_seq))
+X_train, X_test = X_seq[:-test_size], X_seq[-test_size:]
+y_train, y_test = y_seq[:-test_size], y_seq[-test_size:]
+weights = class_weight.compute_class_weight(class_weight="balanced",
+                                            classes=np.unique(y_train),
+                                            y=y_train)
+class_weight_dict = dict(zip(np.unique(y_train), weights))
+model = tf.keras.Sequential([
+    tf.keras.layers.Input(shape=(seq_length, len(feature_cols))),
+    tf.keras.layers.LSTM(64, return_sequences=True),
+    tf.keras.layers.Dropout(0.25),
+    tf.keras.layers.LSTM(32),
+    tf.keras.layers.Dropout(0.25),
+    tf.keras.layers.Dense(1, activation="sigmoid")
+])
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+logger.info("Training LSTM risk model with weighted loss and dropout.")
+model.fit(X_train, y_train, epochs=12, batch_size=8,
+          validation_split=0.1, class_weight=class_weight_dict)
+test_loss, test_acc = model.evaluate(X_test, y_test)
+logger.info(f"Test Accuracy: {test_acc:.4f}")
+model_dir = base_dir / "artifacts" / "models" / "timeseries_risk"
+model_dir.mkdir(parents=True, exist_ok=True)
+model.save(model_dir / "lstm_risk_model.keras")
+joblib.dump(scaler, model_dir / "scaler.joblib")
+logger.info(f"Saved LSTM model and scaler to {model_dir}")
+def predict_risk_for_next_day(sequence, threshold=0.5):
+    seq = scaler.transform(sequence)
+    seq_window = np.expand_dims(seq, axis=0)
+    pred_prob = model.predict(seq_window)[0][0]
+    pred_label = int(pred_prob > threshold)
+    logger.info(f"Predicted next-day risk score: {pred_prob:.3f} (region: {region_name}), label: {pred_label}")
+    return pred_prob, pred_label
+if X_test.shape[0] > 0:
+    logger.info("Demo prediction for next-day risk using last window of test set:")
+    predict_risk_for_next_day(X_test[0], threshold=0.5)

src/components/recommendation_engine.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def generate_recommendation(
+    risk_score,
+    region,
+    recent_incidents=None,
+    weather_alert=None,
+    intent=None,
+    origin=None,
+    destination=None
+):
+    if origin and destination:
+        region_str = f"{origin} to {destination}"
+    else:
+        region_str = region
+    if risk_score >= 0.8:
+        level = "High risk"
+        message = (
+            f"{level} detected for {region_str}! Recent incidents or delays increase disruption probability. "
+            "Immediate mitigation advised—consider rerouting, switching suppliers, or delaying shipment."
+        )
+        action = "reroute/switch_supplier/delay"
+    elif risk_score >= 0.6:
+        level = "Elevated risk"
+        message = (
+            f"{level} in {region_str}. Monitor closely and prioritize more reliable suppliers and routes."
+        )
+        action = "monitor_prioritize"
+    elif risk_score >= 0.3:
+        level = "Moderate risk"
+        message = (
+            f"{level} for {region_str}. Standard operations are feasible, but stay alert for escalating risks."
+        )
+        action = "continue_monitor"
+    else:
+        level = "Low risk"
+        message = f"{level} for {region_str}. Proceed with routine operations."
+        action = "proceed"
+    if weather_alert:
+        message += f"\nWeather Alert: {weather_alert}"
+    if recent_incidents:
+        message += f"\nRecent incidents: {', '.join(recent_incidents[:3])}"
+    if recent_incidents and risk_score >= 0.8:
+        message += "\nSupply chain disruption likely due to recent incidents. Take immediate action to mitigate risk."
+    if intent == "mitigation_help" and risk_score >= 0.5:
+        message += "\nWould you like to view alternate routes or suppliers for mitigation?"
+    logger.info(f"Recommendation for {region_str} (risk: {risk_score:.2f}): {action}")
+    return {
+        "message": message,
+        "action": action,
+        "risk_score": risk_score,
+        "region": region_str
+    }
+if __name__ == "__main__":
+    ex1 = generate_recommendation(
+        risk_score=0.85,
+        region="Shanghai",
+        recent_incidents=['port strike', 'supplier outage', 'heavy rain'],
+        weather_alert='Typhoon warning',
+        intent="mitigation_help",
+        origin="Shanghai",
+        destination="Los Angeles"
+    )
+    print("\n--- Example Recommendation ---")
+    print(ex1["message"])
+    ex2 = generate_recommendation(
+        risk_score=0.55,
+        region="Delhi",
+        recent_incidents=['route accident', 'moderate rain'],
+        weather_alert=None,
+        intent="risk_check",
+        origin="Delhi",
+        destination="Dubai"
+    )
+    print("\n--- Example Recommendation ---")
+    print(ex2["message"])
+    ex3 = generate_recommendation(
+        risk_score=0.15,
+        region="Mumbai",
+        recent_incidents=[],
+        intent=None
+    )
+    print("\n--- Example Recommendation ---")
+    print(ex3["message"])

src/config/__init__.py ADDED Viewed

File without changes

src/config/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (522 Bytes). View file

src/config/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+API_CONFIG = {
+    "regions": ["Mumbai", "Shanghai", "New York"],  # For GNews
+    "weather_regions": [
+        {"city": "Mumbai", "lat": 19.0760, "lon": 72.8777},
+        {"city": "Shanghai", "lat": 31.2304, "lon": 121.4737},
+        {"city": "New York", "lat": 40.7128, "lon": -74.0060}
+    ],
+    "news_output_dir": "data/news",
+    "weather_output_dir": "data/weather"
+}

src/pipeline/__init__.py ADDED Viewed

File without changes

src/pipeline/__pycache__/data_refresh_workflow.cpython-311.pyc ADDED Viewed

Binary file (5 kB). View file

src/pipeline/data_refresh_workflow.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+from datetime import datetime
+import sys
+from pathlib import Path
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from utils.logger import *
+from components.api_gnews_fetcher import GNewsFetcher
+from components.api_weather_fetcher import WeatherFetcher
+from config.config import API_CONFIG
+from utils.logger import *
+import logging
+logger = logging.getLogger(__name__)
+def ensure_dir(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+        logger.info(f"Directory created: {path}")
+def save_snapshot(data, folder, prefix, region):
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    fname = f"{prefix}_{region.replace(' ', '_')}_{timestamp}.json"
+    fpath = os.path.join(folder, fname)
+    with open(fpath, "w", encoding="utf-8") as f:
+        import json
+        json.dump(data, f, ensure_ascii=False, indent=2)
+    logger.info(f"Snapshot saved: {fpath}")
+def refresh_gnews(regions, out_dir):
+    fetcher = GNewsFetcher()
+    ensure_dir(out_dir)
+    for region in regions:
+        try:
+            news_data = fetcher.fetch_news(region)
+            save_snapshot(news_data, out_dir, "gnews", region)
+            logger.info(f"GNews data for {region} saved.")
+        except Exception as e:
+            logger.error(f"Error fetching GNews for {region}: {e}")
+def refresh_weather(weather_regions, out_dir):
+    fetcher = WeatherFetcher()
+    ensure_dir(out_dir)
+    for loc in weather_regions:
+        try:
+            weather_data = fetcher.fetch_weather(loc["lat"], loc["lon"])
+            save_snapshot(weather_data, out_dir, "weather", loc["city"])
+            logger.info(f"Weather data for {loc['city']} saved.")
+        except Exception as e:
+            logger.error(f"Error fetching Weather for {loc['city']}: {e}")
+def run_all():
+    logger.info("Starting data refresh workflow...")
+    regions = API_CONFIG['regions']              # For GNews
+    weather_regions = API_CONFIG['weather_regions'] # For WeatherBit
+    news_dir = API_CONFIG['news_output_dir']
+    weather_dir = API_CONFIG['weather_output_dir']
+    refresh_gnews(regions, news_dir)
+    refresh_weather(weather_regions, weather_dir)
+    logger.info("All data refreshes complete.")
+if __name__ == "__main__":
+    run_all()

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/__pycache__/logger.cpython-311.pyc ADDED Viewed

Binary file (744 Bytes). View file

src/utils/__pycache__/logger.cpython-313.pyc ADDED Viewed

Binary file (710 Bytes). View file

src/utils/logger.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import logging
+import os
+os.makedirs('artifacts/logs', exist_ok=True)
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(os.path.join('artifacts', 'logs', 'logfile.txt')),
+        logging.StreamHandler()
+    ]
+)