Subi003 commited on
Commit
4c01182
·
verified ·
1 Parent(s): 26c91c4

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +43 -0
  2. Dockerfile +30 -0
  3. README.md +15 -10
  4. Toxic_TweetTagger.egg-info/PKG-INFO +46 -0
  5. Toxic_TweetTagger.egg-info/SOURCES.txt +27 -0
  6. Toxic_TweetTagger.egg-info/dependency_links.txt +1 -0
  7. Toxic_TweetTagger.egg-info/requires.txt +25 -0
  8. Toxic_TweetTagger.egg-info/top_level.txt +4 -0
  9. __init__.py +0 -0
  10. __pycache__/__init__.cpython-311.pyc +0 -0
  11. app/__pycache__/main.cpython-311.pyc +0 -0
  12. app/api/__pycache__/api_routes.cpython-311.pyc +0 -0
  13. app/api/__pycache__/dependencies.cpython-311.pyc +0 -0
  14. app/api/__pycache__/schemas.cpython-311.pyc +0 -0
  15. app/api/api_routes.py +39 -0
  16. app/api/dependencies.py +17 -0
  17. app/api/schemas.py +44 -0
  18. app/main.py +86 -0
  19. app/middleware/__init__.py +66 -0
  20. app/middleware/__pycache__/__init__.cpython-311.pyc +0 -0
  21. app/model/MLmodel +31 -0
  22. app/model/artifacts/booster.json +0 -0
  23. app/model/artifacts/metrics.json +1 -0
  24. app/model/artifacts/model.joblib +3 -0
  25. app/model/artifacts/vectorizer.joblib +3 -0
  26. app/model/conda.yaml +15 -0
  27. app/model/python_env.yaml +7 -0
  28. app/model/python_model.pkl +3 -0
  29. app/model/registered_model_meta +2 -0
  30. app/model/requirements.txt +8 -0
  31. app/monitoring/__init__.py +0 -0
  32. app/monitoring/__pycache__/__init__.cpython-311.pyc +0 -0
  33. app/monitoring/__pycache__/http_metrics.cpython-311.pyc +0 -0
  34. app/monitoring/__pycache__/service_metrics.cpython-311.pyc +0 -0
  35. app/monitoring/http_metrics.py +20 -0
  36. app/monitoring/service_metrics.py +62 -0
  37. app/requirements.txt +7 -0
  38. app/services/__pycache__/explainer.cpython-311.pyc +0 -0
  39. app/services/__pycache__/feedback.cpython-311.pyc +0 -0
  40. app/services/__pycache__/inference.cpython-311.pyc +0 -0
  41. app/services/explainer.py +55 -0
  42. app/services/feedback.py +38 -0
  43. app/services/inference.py +113 -0
  44. app/workers/__init__.py +144 -0
  45. app/workers/__pycache__/__init__.cpython-311.pyc +0 -0
  46. components/__init__.py +0 -0
  47. components/__pycache__/__init__.cpython-311.pyc +0 -0
  48. components/__pycache__/data_ingestion.cpython-311.pyc +0 -0
  49. components/__pycache__/data_preprocessing.cpython-311.pyc +0 -0
  50. components/__pycache__/data_validation.cpython-311.pyc +0 -0
.dockerignore ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.so
5
+
6
+ # Ignore Jupyter notebooks (if not used)
7
+ *.ipynb
8
+ .ipynb_checkpoints/
9
+
10
+ # Ignore logs and temp files
11
+ *.log
12
+ logs/
13
+ *.tmp
14
+ *.DS_Store
15
+
16
+ # Ignore version control and dev files
17
+ .git/
18
+ .github/
19
+ .vscode/
20
+ *.env
21
+ .env*
22
+ .gitignore
23
+
24
+ # MLflow & DVC metadata (keep only if you need them at runtime)
25
+ .mlflow/
26
+ .dvc/
27
+ .dvcignore
28
+
29
+ # CI/CD config files
30
+ tox.ini
31
+ pytest.ini
32
+ setup.cfg
33
+ setup.py
34
+ requirements-dev.txt
35
+
36
+ # Ignore Docker build context bloat
37
+ *.tar
38
+ *.zip
39
+ *.gz
40
+ *.egg-info/
41
+
42
+ # Ignore Hugging Face cache
43
+ ~/.cache/huggingface/
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.11-slim-bookworm
2
+
3
+ RUN apt-get update && \
4
+ apt-get install --no-install-recommends -y build-essential && \
5
+ rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ COPY . /app
10
+
11
+ RUN pip install --no-cache-dir --upgrade pip && \
12
+ pip install --no-cache-dir -r app/requirements.txt -r app/model/requirements.txt
13
+
14
+ RUN mkdir -p /tmp/prometheus_metrics && \
15
+ chmod 777 /tmp/prometheus_metrics
16
+
17
+ COPY start.sh /start.sh
18
+ RUN chmod +x /start.sh
19
+
20
+ RUN useradd -m appuser
21
+ USER appuser
22
+
23
+ EXPOSE 7860
24
+ ENV HOST=0.0.0.0 \
25
+ PORT=7860 \
26
+ PYTHONUNBUFFERED=1 \
27
+ PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus_metrics
28
+
29
+ ENTRYPOINT ["/start.sh"]
30
+ CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "app.main:app", "--workers", "2", "--bind", "0.0.0.0:7860"]
README.md CHANGED
@@ -1,10 +1,15 @@
1
- ---
2
- title: ToxicTweet Tagger
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
+ ---
2
+ title: Toxic Tweet Tagger
3
+ emoji: 🤖
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ python_version: "3.11"
9
+ app_file: app.py
10
+ pinned: false
11
+ ---
12
+
13
+ # Toxic Tweet Tagger
14
+
15
+ A machine learning app that detects toxic tweets and explains predictions using LIME.
Toxic_TweetTagger.egg-info/PKG-INFO ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: Toxic-TweetTagger
3
+ Version: 0.1.0
4
+ Summary: End to end Hate-Tweet detection automation with MLOps implementation
5
+ Home-page: https://github.com/SubinoyBera/Toxic-TweetTagger
6
+ Author: Subinoy Bera
7
+ Author-email: subinoyberadgp@gmail.com
8
+ License: Apache-2.0
9
+ Classifier: Topic :: Engineering/Automation :: ML/MLOps
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy==2.2.6
20
+ Requires-Dist: pandas==2.3.1
21
+ Requires-Dist: scipy==1.13.1
22
+ Requires-Dist: scikit-learn==1.7.0
23
+ Requires-Dist: xgboost==3.0.2
24
+ Requires-Dist: nltk==3.9.1
25
+ Requires-Dist: python-box==7.3.2
26
+ Requires-Dist: ensure==1.0.4
27
+ Requires-Dist: PyYAML==6.0.2
28
+ Requires-Dist: dvc==3.61.0
29
+ Requires-Dist: mlflow==2.22.1
30
+ Requires-Dist: dagshub==0.5.10
31
+ Requires-Dist: fastapi==0.116.1
32
+ Requires-Dist: pydantic==2.11.7
33
+ Requires-Dist: tqdm==4.67.1
34
+ Requires-Dist: requests==2.32.4
35
+ Requires-Dist: pytest==8.4.1
36
+ Requires-Dist: tox==4.11.3
37
+ Provides-Extra: testing
38
+ Requires-Dist: pytest>=8.0.0; extra == "testing"
39
+ Requires-Dist: black>=25.0.0; extra == "testing"
40
+ Requires-Dist: flake8>=6.0.0; extra == "testing"
41
+ Requires-Dist: mypy>=1.5.0; extra == "testing"
42
+ Requires-Dist: tox>=4.0.0; extra == "testing"
43
+ Dynamic: license-file
44
+
45
+ # Toxic-TweetTagger
46
+ Hate speech detection
Toxic_TweetTagger.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ setup.cfg
4
+ setup.py
5
+ src/Toxic_TweetTagger.egg-info/PKG-INFO
6
+ src/Toxic_TweetTagger.egg-info/SOURCES.txt
7
+ src/Toxic_TweetTagger.egg-info/dependency_links.txt
8
+ src/Toxic_TweetTagger.egg-info/requires.txt
9
+ src/Toxic_TweetTagger.egg-info/top_level.txt
10
+ src/components/__init__.py
11
+ src/components/data_ingestion.py
12
+ src/components/data_preprocessing.py
13
+ src/components/feature_engineering.py
14
+ src/components/model_evaluation.py
15
+ src/components/model_training.py
16
+ src/components/register_model.py
17
+ src/constant/__init__.py
18
+ src/constant/constants.py
19
+ src/core/__init__.py
20
+ src/core/config_entity.py
21
+ src/core/configuration.py
22
+ src/core/exception.py
23
+ src/core/logger.py
24
+ src/pipeline/__init__.py
25
+ src/pipeline/ml_pipeline.py
26
+ tests/test_app.py
27
+ tests/test_model.py
Toxic_TweetTagger.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
Toxic_TweetTagger.egg-info/requires.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==2.2.6
2
+ pandas==2.3.1
3
+ scipy==1.13.1
4
+ scikit-learn==1.7.0
5
+ xgboost==3.0.2
6
+ nltk==3.9.1
7
+ python-box==7.3.2
8
+ ensure==1.0.4
9
+ PyYAML==6.0.2
10
+ dvc==3.61.0
11
+ mlflow==2.22.1
12
+ dagshub==0.5.10
13
+ fastapi==0.116.1
14
+ pydantic==2.11.7
15
+ tqdm==4.67.1
16
+ requests==2.32.4
17
+ pytest==8.4.1
18
+ tox==4.11.3
19
+
20
+ [testing]
21
+ pytest>=8.0.0
22
+ black>=25.0.0
23
+ flake8>=6.0.0
24
+ mypy>=1.5.0
25
+ tox>=4.0.0
Toxic_TweetTagger.egg-info/top_level.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ components
2
+ constant
3
+ core
4
+ pipeline
__init__.py ADDED
File without changes
__pycache__/__init__.cpython-311.pyc ADDED
Binary file (152 Bytes). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (5.04 kB). View file
 
app/api/__pycache__/api_routes.cpython-311.pyc ADDED
Binary file (3.36 kB). View file
 
app/api/__pycache__/dependencies.cpython-311.pyc ADDED
Binary file (1.18 kB). View file
 
app/api/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (3.97 kB). View file
 
app/api/api_routes.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request, Depends
2
+ from prometheus_client import generate_latest, CollectorRegistry, multiprocess, CONTENT_TYPE_LATEST
3
+ from fastapi.responses import Response, HTMLResponse
4
+
5
+ from src.app.services.inference import InferenceService
6
+ from src.app.services.explainer import ExplainerService
7
+ from src.app.services.feedback import FeedbackService
8
+ from src.app.api.dependencies import get_inference_service, get_explainer_service, get_feedback_service
9
+ from src.app.api.schemas import InferenceRequest, InferenceResponse, FeedbackRequest, ExplanationRequest
10
+
11
+ router = APIRouter()
12
+
13
+ @router.get("/health")
14
+ async def health_check():
15
+ return {"status": "ok"}
16
+
17
+ @router.post("/predict", response_model=InferenceResponse)
18
+ async def predict(request: Request, payload: InferenceRequest, service: InferenceService = Depends(get_inference_service)):
19
+ request_id = request.state.request_id
20
+ return service.predict(request_id, payload.input_tweet, payload.text)
21
+
22
+ @router.post("/explain")
23
+ async def explain(payload: ExplanationRequest, service: ExplainerService = Depends(get_explainer_service)):
24
+ return HTMLResponse(service.explain(payload.input_tweet))
25
+
26
+ @router.post("/submit_feedback")
27
+ async def submit_feedback(request: Request, payload: FeedbackRequest, service: FeedbackService = Depends(get_feedback_service)):
28
+ request_id = request.state.request_id
29
+ return service.submit_feedback(request_id, payload.predicted_label, payload.feedback_label)
30
+
31
+ @router.get("/metrics")
32
+ def metrics():
33
+ registry = CollectorRegistry()
34
+ multiprocess.MultiProcessCollector(registry)
35
+ return Response(
36
+ generate_latest(registry),
37
+ media_type=CONTENT_TYPE_LATEST,
38
+ headers={"Cache-Control": "no-cache"}
39
+ )
app/api/dependencies.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+
3
+ from src.app.services.inference import InferenceService
4
+ from src.app.services.explainer import ExplainerService
5
+ from src.app.services.feedback import FeedbackService
6
+
7
+
8
+ def get_inference_service(request: Request) -> InferenceService:
9
+ return request.app.state.prediction_service
10
+
11
+
12
+ def get_explainer_service(request: Request) -> ExplainerService:
13
+ return request.app.state.explainer_service
14
+
15
+
16
+ def get_feedback_service(request: Request) -> FeedbackService:
17
+ return request.app.state.feedback_service
app/api/schemas.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Schema validation for the API requests and responses
2
+
3
+ from pydantic import BaseModel, Field
4
+ from typing import Annotated, Dict, Literal, Optional
5
+ from datetime import datetime
6
+
7
+
8
+ class InferenceRequest(BaseModel):
9
+ input_tweet: Annotated[str, Field(..., description="Input tweet or comment text for classification")]
10
+ text: Annotated[str, Field(..., description="Preprocessed text to be fed to the model for prediction")]
11
+
12
+ class ExplanationRequest(BaseModel):
13
+ input_tweet: Annotated[str, Field(..., description="Input tweet or comment text for generating explanation")]
14
+
15
+ class PredictionResult(BaseModel):
16
+ label: int
17
+ confidence: float = Field(..., ge=0.0, le=1.0, description="Prediction probability")
18
+ toxicity: Literal["strong", "high", "uncertain", "none"]
19
+
20
+ class ModelInfoSchema(BaseModel):
21
+ name: str = Field(..., description="Model name")
22
+ version: int = Field(..., description="Model version")
23
+ vectorizer: str = Field(..., description="Vectorizer class name")
24
+
25
+ class MetadataSchema(BaseModel):
26
+ latency: float = Field(..., ge=0, description="Response time in seconds")
27
+ usage: Dict[str, float]
28
+ model: ModelInfoSchema
29
+ streamable: bool = Field(default=False)
30
+ environment: Literal["Standard", "Beta", "Production"]
31
+ api_version: str
32
+
33
+ class InferenceResponse(BaseModel):
34
+ id: str
35
+ timestamp: datetime
36
+ object: Literal["text-classification"]
37
+ prediction: PredictionResult
38
+ warnings: Optional[dict] = None
39
+ metadata: MetadataSchema
40
+
41
+
42
+ class FeedbackRequest(BaseModel):
43
+ predicted_label: Literal[0, 1]
44
+ feedback_label: Literal[0, 1]
app/main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import yaml, json
3
+ from pathlib import Path
4
+ from fastapi import FastAPI
5
+ from contextlib import asynccontextmanager
6
+
7
+ import xgboost as xgb
8
+ from lime.lime_text import LimeTextExplainer
9
+ from src.core.constants import REGISTERED_MODELS_DIR
10
+ from src.app.workers import BufferedEventConsumerWorker
11
+ from src.app.services.inference import InferenceService
12
+ from src.app.services.feedback import FeedbackService
13
+ from src.app.services.explainer import ExplainerService
14
+
15
+ from src.utils import load_obj
16
+ from src.app.middleware import http_observability_middleware
17
+ from src.core.constants import DATABASE_NAME, PRODUCTION_COLLECTION_NAME, FEEDBACK_COLLECTION_NAME
18
+ from src.core.mongo_client import MongoDBClient
19
+ from src.core.logger import logging
20
+ from src.core.exception import AppException
21
+
22
+ from src.app.api.api_routes import router
23
+
24
+ @asynccontextmanager
25
+ async def lifespan(app: FastAPI):
26
+ # application state setup
27
+ try:
28
+ mongo_client = MongoDBClient()
29
+ # load model
30
+ xgb_booster = xgb.Booster()
31
+ xgb_booster.load_model(Path(REGISTERED_MODELS_DIR, "artifacts", "booster.json"))
32
+ xgb_booster.set_param({"nthread": 1})
33
+ # load vectorizer
34
+ vectorizer = load_obj(Path(REGISTERED_MODELS_DIR, "artifacts"), "vectorizer.joblib")
35
+
36
+ with open(Path(REGISTERED_MODELS_DIR, "artifacts/metrics.json"), 'r') as f:
37
+ metrics = json.load(f)
38
+ eval_threshold = metrics.get("threshold", 0.5)
39
+
40
+ # get model version
41
+ with open(Path("src/app/model/registered_model_meta"), 'r') as f:
42
+ model_metadata = yaml.safe_load(f)
43
+ if not model_metadata:
44
+ raise FileNotFoundError("Failed to load file having model metadata")
45
+ model_version = int(model_metadata.get("model_version", 0))
46
+
47
+ # initialize workers
48
+ prediction_event_consumer = BufferedEventConsumerWorker(mongo_client, DATABASE_NAME, PRODUCTION_COLLECTION_NAME)
49
+ feedback_event_consumer = BufferedEventConsumerWorker(mongo_client, DATABASE_NAME, FEEDBACK_COLLECTION_NAME)
50
+
51
+ # initialize services
52
+ app.state.prediction_service = InferenceService(xgb_booster, vectorizer, eval_threshold,
53
+ prediction_event_consumer, model_version)
54
+
55
+ app.state.feedback_service = FeedbackService(feedback_event_consumer)
56
+
57
+ lime_explainer = LimeTextExplainer(class_names=["hate", "non-hate"], bow=False)
58
+ app.state.explainer_service = ExplainerService(lime_explainer, xgb_booster, vectorizer)
59
+
60
+ logging.info("Infernce API app server started successfully")
61
+
62
+ except Exception as e:
63
+ logging.critical(f"Startup Failed: {e}", exc_info=True)
64
+ raise AppException(e, sys)
65
+
66
+ # run application
67
+ yield
68
+
69
+ # application shutdown
70
+ prediction_event_consumer.shutdown()
71
+ feedback_event_consumer.shutdown()
72
+ mongo_client.close_connection()
73
+
74
+ # Create FastAPI app
75
+ app = FastAPI(
76
+ title="Hate Speech Detection API",
77
+ version="2.0.0",
78
+ description="Production-grade ML inference API with monitoring and feedback system.",
79
+ lifespan=lifespan
80
+ )
81
+
82
+ # Register API routes
83
+ app.include_router(router, prefix="/api")
84
+
85
+ # Register middleware
86
+ app.middleware("http")(http_observability_middleware)
app/middleware/__init__.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import uuid
3
+ from fastapi import Request
4
+ from src.core.logger import logging
5
+ from src.app.monitoring.http_metrics import HTTP_REQUESTS_TOTAL, HTTP_REQUEST_DURATION_SECONDS, HTTP_REQUESTS_IN_PROGRESS
6
+
7
+ async def http_observability_middleware(request: Request, call_next):
8
+ # Skip Prometheus metrics endpoint
9
+ if request.url.path == "/api/metrics":
10
+ return await call_next(request)
11
+
12
+ request_id = request.headers.get("X-Request-ID")
13
+ if not request_id:
14
+ request_id = str(uuid.uuid4())
15
+
16
+ request.state.request_id = request_id
17
+
18
+ method = request.method
19
+ route = request.scope.get("route")
20
+ path = route.path if route else request.url.path
21
+
22
+ start_time = time.perf_counter()
23
+
24
+ logging.info(f"[{request_id}] Incoming request {method} {path}")
25
+
26
+ HTTP_REQUESTS_IN_PROGRESS.inc()
27
+
28
+ try:
29
+ response = await call_next(request)
30
+ status_code = response.status_code
31
+
32
+ except Exception:
33
+ duration = time.perf_counter() - start_time
34
+
35
+ HTTP_REQUESTS_TOTAL.labels(
36
+ method=method,
37
+ path=path,
38
+ status="500",
39
+ ).inc()
40
+
41
+ HTTP_REQUEST_DURATION_SECONDS.labels(
42
+ method=method,
43
+ path=path,
44
+ ).observe(duration)
45
+
46
+ HTTP_REQUESTS_IN_PROGRESS.dec()
47
+ raise
48
+
49
+ duration = time.perf_counter() - start_time
50
+
51
+ HTTP_REQUESTS_TOTAL.labels(
52
+ method=method,
53
+ path=path,
54
+ status=str(status_code),
55
+ ).inc()
56
+
57
+ HTTP_REQUEST_DURATION_SECONDS.labels(
58
+ method=method,
59
+ path=path,
60
+ ).observe(duration)
61
+
62
+ HTTP_REQUESTS_IN_PROGRESS.dec()
63
+
64
+ response.headers["X-Request-ID"] = request_id
65
+
66
+ return response
app/middleware/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.7 kB). View file
 
app/model/MLmodel ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifact_path: XGB-v4
2
+ flavors:
3
+ python_function:
4
+ artifacts:
5
+ booster:
6
+ path: artifacts\booster.json
7
+ uri: D:\My Projects\Toxic Tweet-Tagger\test_models\booster.json
8
+ metrics:
9
+ path: artifacts\metrics.json
10
+ uri: D:\My Projects\Toxic Tweet-Tagger\test_models\metrics.json
11
+ model:
12
+ path: artifacts\model.joblib
13
+ uri: D:\My Projects\Toxic Tweet-Tagger\test_models\model.joblib
14
+ vectorizer:
15
+ path: artifacts\vectorizer.joblib
16
+ uri: D:\My Projects\Toxic Tweet-Tagger\test_models\vectorizer.joblib
17
+ cloudpickle_version: 3.1.1
18
+ code: null
19
+ env:
20
+ conda: conda.yaml
21
+ virtualenv: python_env.yaml
22
+ loader_module: mlflow.pyfunc.model
23
+ python_model: python_model.pkl
24
+ python_version: 3.11.5
25
+ streamable: false
26
+ mlflow_version: 2.22.1
27
+ model_size_bytes: 4509986
28
+ model_uuid: a2c7dc9359894fab899c13a88f9f9a4c
29
+ prompts: null
30
+ run_id: 9b6349ac22e04cf0a22257952299e056
31
+ utc_time_created: '2026-02-17 03:36:56.916972'
app/model/artifacts/booster.json ADDED
The diff for this file is too large to render. See raw diff
 
app/model/artifacts/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"threshold": 0.48, "accuracy": 0.80601184024014, "precision": 0.7922348788705278, "recall": 0.8272049585392411, "f1 score": 0.8093423478795329, "roc_auc": 0.8852078424195188}
app/model/artifacts/model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944104aefb800d70fccf6e65e545f3b3fbbbd79382e461ba06f98992f2e686fe
3
+ size 1240274
app/model/artifacts/vectorizer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fea91a7b6fb18543f7a7e2717e44135985df11a339efbc30b410dbad8d962fe
3
+ size 116783
app/model/conda.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ channels:
2
+ - conda-forge
3
+ dependencies:
4
+ - python=3.11.5
5
+ - pip<=25.1
6
+ - pip:
7
+ - mlflow==2.22.1
8
+ - cloudpickle==3.1.1
9
+ - numpy==2.2.6
10
+ - pandas==2.3.1
11
+ - psutil==7.0.0
12
+ - scikit-learn==1.6.1
13
+ - scipy==1.13.1
14
+ - xgboost==3.0.2
15
+ name: mlflow-env
app/model/python_env.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python: 3.11.5
2
+ build_dependencies:
3
+ - pip==25.1
4
+ - setuptools==80.9.0
5
+ - wheel==0.45.1
6
+ dependencies:
7
+ - -r requirements.txt
app/model/python_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6472cb967ef181289c33621e9dbfd2437a53589d1ca75de4307ba20a0bae7ef
3
+ size 1378921
app/model/registered_model_meta ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_name: ToxicTagger-Models
2
+ model_version: '26'
app/model/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ mlflow==2.22.1
2
+ cloudpickle==3.1.1
3
+ numpy==2.2.6
4
+ pandas==2.3.1
5
+ psutil==7.0.0
6
+ scikit-learn==1.6.1
7
+ scipy==1.13.1
8
+ xgboost==3.0.2
app/monitoring/__init__.py ADDED
File without changes
app/monitoring/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
app/monitoring/__pycache__/http_metrics.cpython-311.pyc ADDED
Binary file (931 Bytes). View file
 
app/monitoring/__pycache__/service_metrics.cpython-311.pyc ADDED
Binary file (1.96 kB). View file
 
app/monitoring/http_metrics.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from prometheus_client import Counter, Histogram, Gauge
2
+
3
+
4
+ HTTP_REQUESTS_TOTAL = Counter(
5
+ "http_requests_total",
6
+ "Total number of HTTP requests handled by the inference service",
7
+ ["method", "path", "status"],
8
+ )
9
+
10
+ HTTP_REQUEST_DURATION_SECONDS = Histogram(
11
+ "http_request_duration_seconds",
12
+ "HTTP request latency in seconds",
13
+ ["method", "path"],
14
+ buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0),
15
+ )
16
+
17
+ HTTP_REQUESTS_IN_PROGRESS = Gauge(
18
+ "http_requests_in_progress",
19
+ "Number of HTTP requests currently being processed",
20
+ )
app/monitoring/service_metrics.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from prometheus_client import Counter, Histogram
2
+
3
+ # Requests successfully served
4
+ PREDICTION_REQUEST_SUCCESS = Counter(
5
+ "predict_requests_success_total",
6
+ "Total successful prediction responses"
7
+ )
8
+
9
+ # Requests failed
10
+ PREDICTION_REQUEST_FAILED = Counter(
11
+ "predict_requests_failed_total",
12
+ "Total failed prediction requests"
13
+ )
14
+
15
+ # Prediction class distribution (hate / non-hate)
16
+ PREDICTION_CLASS = Counter(
17
+ "prediction_class_total",
18
+ "Count of predicted classes",
19
+ ["class_label"] # label dimension
20
+ )
21
+
22
+ # Prediction label confidence
23
+ PREDICTION_CONFIDENCE = Histogram(
24
+ "prediction_confidence",
25
+ "Confidence distribution by predicted class",
26
+ ["class_label"],
27
+ buckets=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
28
+ )
29
+
30
+ # Inference responce time
31
+ INFERENCE_LATENCY = Histogram(
32
+ "model_inference_seconds",
33
+ "Model inference time in seconds",
34
+ buckets=[0.005, 0.01, 0.02, 0.03, 0.05, 0.1]
35
+ )
36
+
37
+ EXPLAINER_REQUEST_SUCCESS = Counter(
38
+ "explainer_requests_success_total",
39
+ "Total successful explain requests"
40
+ )
41
+
42
+ EXPLAINER_REQUEST_FAILED = Counter(
43
+ "explainer_requests_failed_total",
44
+ "Total failed explain requests"
45
+ )
46
+
47
+ # Feedback counter
48
+ FEEDBACK_REQUEST_SUCCESS = Counter(
49
+ "feedback_subissions_success_total",
50
+ "Total successful feedback submissions"
51
+ )
52
+
53
+ USER_PREDICTION_FEEDBACK = Counter(
54
+ "user_prediction_feedback_total",
55
+ "User feedback indicating whether the model prediction was correct",
56
+ ["feedback"]
57
+ )
58
+
59
+ FEEDBACK_REQUEST_FAILED = Counter(
60
+ "feedback_submissions_failed_total",
61
+ "Total failed feedback submissions"
62
+ )
app/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.116.1
2
+ uvicorn==0.35.0
3
+ joblib==1.5.1
4
+ PyYAML==6.0.2
5
+ lime==0.2.0.1
6
+ gunicorn==23.0.0
7
+ prometheus-client==0.23.1
app/services/__pycache__/explainer.cpython-311.pyc ADDED
Binary file (3.12 kB). View file
 
app/services/__pycache__/feedback.cpython-311.pyc ADDED
Binary file (2.4 kB). View file
 
app/services/__pycache__/inference.cpython-311.pyc ADDED
Binary file (5.56 kB). View file
 
app/services/explainer.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import numpy as np
3
+ from src.core.logger import logging
4
+ from src.core.exception import AppException
5
+ from src.app.monitoring.service_metrics import EXPLAINER_REQUEST_SUCCESS, EXPLAINER_REQUEST_FAILED
6
+
7
+
8
+ class ExplainerService:
9
+ def __init__(self, explainer, model_booster, vectorizer):
10
+ """
11
+ Initializes an instance of LimeExplainer explaination service.
12
+
13
+ Args:
14
+ explainer (LimeTextExplainer) : An instance of LimeTextExplainer.
15
+ model_booster (xgb.XGBClassifier) : An instance of the XGBoost model.
16
+ vectorizer : An instance of the vectorizer to transform input text into numerical features.
17
+ """
18
+ self.explainer = explainer
19
+ self.booster = model_booster
20
+ self.vectorizer = vectorizer
21
+
22
+ def _get_prediction(self, text) -> np.ndarray:
23
+ """
24
+ Internal function to get class probability scores for lime explainer.
25
+ """
26
+ X = self.vectorizer.transform(text)
27
+ prob = self.booster.inplace_predict(X)
28
+
29
+ if len(prob.shape) == 1:
30
+ prob = np.vstack([1 - prob, prob]).T
31
+ return prob
32
+
33
+ def explain(self, text: str):
34
+ """
35
+ Generate an explanation of the prediction made by the model for a given text.
36
+
37
+ Returns:
38
+ HTML content of the explanation which is rendered in the UI
39
+ """
40
+ try:
41
+ explanation = self.explainer.explain_instance(
42
+ text,
43
+ self._get_prediction,
44
+ num_features=10,
45
+ num_samples=20
46
+ )
47
+ html_content = explanation.as_html()
48
+ EXPLAINER_REQUEST_SUCCESS.inc()
49
+
50
+ return html_content
51
+
52
+ except Exception as e:
53
+ EXPLAINER_REQUEST_FAILED.inc()
54
+ logging.error(f"Explainer service failed: {e}", exc_info=True)
55
+ raise AppException(e, sys)
app/services/feedback.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from datetime import datetime, timezone
3
+ from src.core.logger import logging
4
+ from src.core.exception import AppException
5
+ from src.app.monitoring.service_metrics import FEEDBACK_REQUEST_SUCCESS, USER_PREDICTION_FEEDBACK, FEEDBACK_REQUEST_FAILED
6
+
7
+
8
+ class FeedbackService:
9
+ def __init__(self, feedback_event_consumer):
10
+
11
+ self.event_consumer_worker = feedback_event_consumer
12
+
13
+ def submit_feedback(self, request_id, pred_label, feedback_label):
14
+ try:
15
+ feedback_record = {
16
+ "request_id": request_id,
17
+ "time_stamp": datetime.now(timezone.utc).isoformat(),
18
+ "predicted_label": pred_label,
19
+ "feedback_label": feedback_label
20
+ }
21
+
22
+ self.event_consumer_worker.add_event(feedback_record)
23
+ FEEDBACK_REQUEST_SUCCESS.inc()
24
+
25
+ if feedback_label == 1:
26
+ USER_PREDICTION_FEEDBACK.labels(feedback="correct").inc()
27
+ else:
28
+ USER_PREDICTION_FEEDBACK.labels(feedback="incorrect").inc()
29
+
30
+ return {
31
+ "status": "success",
32
+ "message": "Feedback recorded successfully",
33
+ }
34
+
35
+ except Exception as e:
36
+ logging.exception(f"Failed to submit feedback for request_id: {request_id} : {e}")
37
+ FEEDBACK_REQUEST_FAILED.inc()
38
+ raise AppException(e, sys)
app/services/inference.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time, sys
2
+ from datetime import datetime, timezone
3
+ from src.core.logger import logging
4
+ from src.core.exception import AppException
5
+ from src.app.monitoring.service_metrics import (PREDICTION_REQUEST_SUCCESS, PREDICTION_REQUEST_FAILED,
6
+ PREDICTION_CLASS, INFERENCE_LATENCY, PREDICTION_CONFIDENCE)
7
+
8
+ class InferenceService:
9
+ def __init__(self, model_booster, vectorizer, eval_threshold,
10
+ prediction_event_consumer, model_version):
11
+
12
+ self.booster = model_booster
13
+ self.vectorizer = vectorizer
14
+ self.threshold = eval_threshold
15
+ self.event_consumer_worker = prediction_event_consumer
16
+ self.model_version = model_version
17
+
18
+ def predict(self, request_id: str, input_tweet: str, text: str):
19
+ """
20
+ Model inference API endpoint.
21
+
22
+ This endpoint accepts a POST request with a JSON payload containing a comment string.
23
+ It returns a JSON response containing the model prediction, and metadata.
24
+ """
25
+ try:
26
+ timestamp = datetime.now(timezone.utc).isoformat()
27
+ start_time = time.perf_counter()
28
+
29
+ x = self.vectorizer.transform([text])
30
+ prob = self.booster.inplace_predict(x) # P(class=1)
31
+ pred = (prob > self.threshold).astype(int)
32
+
33
+ except Exception as e:
34
+ logging.exception(e)
35
+ PREDICTION_REQUEST_FAILED.inc()
36
+ raise AppException(e, sys)
37
+
38
+ if prob is None or len(pred) == 0:
39
+ logging.error("No prediction made by the model")
40
+ PREDICTION_REQUEST_FAILED.inc()
41
+ raise RuntimeError("No prediction made by the model")
42
+
43
+ if prob[0] > 0.70:
44
+ toxicity = "strong"
45
+ elif prob[0] > self.threshold + 0.05:
46
+ toxicity = "high"
47
+ elif prob[0] > self.threshold - 0.03:
48
+ toxicity = "uncertain"
49
+ else:
50
+ toxicity = "none"
51
+
52
+ confidence = float(prob[0] if pred[0] == 1 else 1-prob[0])
53
+ confidence_margin = abs(2*float(prob[0]) - 1)
54
+
55
+ warnings = None
56
+ if confidence_margin < 0.10:
57
+ message=f"Prediction is close to model decision boundary. Confidence Margin: {round(confidence_margin, 4)}. Manual review is recommended!"
58
+ warnings = {
59
+ "code": "LOW_CONFIDENCE_MARGIN",
60
+ "message": message
61
+ }
62
+
63
+ # Prepare the record to insert into database
64
+ prediction_record = {
65
+ "request_id": request_id,
66
+ "timestamp": timestamp,
67
+ "comment": input_tweet,
68
+ "prediction": int(pred[0]),
69
+ "confidence": round(confidence, 4)
70
+ }
71
+
72
+ # Add the record to the batch writer for asynchronous insertion into MongoDB
73
+ self.event_consumer_worker.add_event(prediction_record)
74
+
75
+ end_time = time.perf_counter()
76
+ response_time = round((end_time - start_time), 4)
77
+
78
+ PREDICTION_REQUEST_SUCCESS.inc()
79
+ # Track latency
80
+ INFERENCE_LATENCY.observe(response_time)
81
+ # Track class distribution
82
+ PREDICTION_CLASS.labels(class_label=str(pred[0])).inc()
83
+ # Track class confidence
84
+ PREDICTION_CONFIDENCE.labels(class_label=str(pred[0])).observe(confidence)
85
+
86
+ response = {
87
+ "id": request_id,
88
+ "timestamp": timestamp,
89
+ "object": "text-classification",
90
+ "prediction": {
91
+ "label": int(pred[0]),
92
+ "confidence": round(confidence, 4),
93
+ "toxicity": toxicity,
94
+ },
95
+ "warnings": warnings if warnings else None,
96
+ "metadata": {
97
+ "latency": response_time,
98
+ "usage": {
99
+ "word_count": len(text.split()),
100
+ "total_characters": len(text)
101
+ },
102
+ "model": {
103
+ "name": "XGB-Classifier-Booster",
104
+ "version": self.model_version,
105
+ "vectorizer": str(type(self.vectorizer).__name__),
106
+ },
107
+ "streamable": False,
108
+ "environment": "Production",
109
+ "api_version": "v-2.0"
110
+ }
111
+ }
112
+
113
+ return response
app/workers/__init__.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This module contains the implementation of the BufferedEventConsumerWorker class, which is responsible for consuming events from a buffer queue and writing them to a MongoDB collection in batches.
2
+
3
+ import time
4
+ import threading
5
+ from typing import Any
6
+ from queue import Queue, Empty, Full
7
+ from src.core.logger import logging
8
+
9
+ class BufferedEventConsumerWorker:
10
+ def __init__(self, mongo_client, database_name, collection_name, queue_maxsize: int = 1200,
11
+ max_batch_size: int = 1000, flush_interval: int = 30, mongo_timeout: int = 5):
12
+ """
13
+ Initializes the buffered event consumer worker.
14
+
15
+ Args:
16
+ mongo_client (MongoClient): The MongoDB client.
17
+ database_name (str): The name of the MongoDB database.
18
+ collection_name (str): The name of the MongoDB collection.
19
+ queue_maxsize (int, optional): The maximum size of the queue. Defaults to 1200.
20
+ max_batch_size (int, optional): The maximum batch size for write operations. Defaults to 1000.
21
+ flush_interval (int, optional): The interval (in seconds) at which to flush the queue. Defaults to 30.
22
+ mongo_timeout (int, optional): The timeout (in seconds) for write operations. Defaults to 5.
23
+ """
24
+ self.queue: Queue[dict[str, Any] | None] = Queue(queue_maxsize)
25
+ self.max_batch_size = max_batch_size
26
+ self.flush_interval = flush_interval
27
+ self.mongo_timeout = mongo_timeout
28
+
29
+ self.shutdown_event: threading.Event = threading.Event()
30
+
31
+ self.client = mongo_client
32
+ self.database_name = database_name
33
+ self.collection_name = collection_name
34
+
35
+ self.worker: threading.Thread = threading.Thread(
36
+ target=self._writer_worker,
37
+ daemon=True
38
+ )
39
+ self.worker.start()
40
+
41
+ def add_event(self, record: dict) -> None:
42
+ """
43
+ Adds a record to the buffer queue.
44
+
45
+ Args:
46
+ record (dict): The record to add to the buffer queue.
47
+ """
48
+ try:
49
+ self.queue.put(record, timeout=0.3)
50
+ except Full:
51
+ logging.warning(f"Failed to add record in buffer queue: Queue is full")
52
+
53
+ def shutdown(self) -> None:
54
+ """
55
+ Gracefully shut down the worker thread.
56
+ Ensures final flush before exit.
57
+ """
58
+ self.shutdown_event.set()
59
+
60
+ # Wake up worker if it's blocked on queue.get()
61
+ self.queue.put(None)
62
+ # Wait the main thread until worker fully exits.
63
+ self.worker.join()
64
+
65
+
66
+ # INTERNAL WORKER
67
+ def _writer_worker(self) -> None:
68
+ """
69
+ The worker thread responsible for consuming records from the buffer queue and writing them to MongoDB.
70
+
71
+ It runs indefinitely until the shutdown event is set, at which point it will drain the queue quickly and exit.
72
+ The worker thread tries to flush the queue at regular intervals, or when the batch size reaches the maximum threshold.
73
+ If the queue is empty, it will wait indefinitely for new records to arrive. If a timeout is reached, it will flush the queue and reset the timer.
74
+ """
75
+ batch = []
76
+ first_record_time = None
77
+
78
+ while not self.shutdown_event.is_set():
79
+ try:
80
+ if first_record_time is None:
81
+ # No records yet → wait indefinitely
82
+ record = self.queue.get()
83
+ else:
84
+ elapsed = time.time() - first_record_time
85
+ remaining = max(self.flush_interval - elapsed, 0)
86
+ record = self.queue.get(timeout=remaining)
87
+
88
+ if record is None:
89
+ break
90
+
91
+ batch.append(record)
92
+
93
+ if first_record_time is None:
94
+ first_record_time = time.time()
95
+
96
+ # Drain quickly if batch growing
97
+ while len(batch) < self.max_batch_size:
98
+ try:
99
+ record = self.queue.get_nowait()
100
+ if record is None:
101
+ break
102
+ batch.append(record)
103
+ except Empty:
104
+ break
105
+
106
+ except Empty:
107
+ # Timeout reached
108
+ pass
109
+
110
+ # Flush conditions
111
+ if batch and (
112
+ len(batch) >= self.max_batch_size or
113
+ (first_record_time and
114
+ time.time() - first_record_time >= self.flush_interval)
115
+ ):
116
+ self._flush(batch)
117
+ batch.clear()
118
+ first_record_time = None
119
+
120
+ # Final flush on shutdown
121
+ if batch:
122
+ self._flush(batch)
123
+ logging.info("BufferedEventConsumer worker stopped cleanly.")
124
+
125
+ # Database flush
126
+ def _flush(self, batch_records: list):
127
+ """
128
+ Writes batch records to MongoDB with basic failure handling.
129
+
130
+ Args:
131
+ batch_records (list): The list of records to flush to the database.
132
+
133
+ Raises:
134
+ Exception: If an error occurs while flushing the batch records to the database.
135
+ """
136
+ try:
137
+ self.client.insert_docs(self.collection_name,
138
+ self.database_name,
139
+ batch_records
140
+ )
141
+ logging.info(f"Flushed {len(batch_records)} records to MongoDB")
142
+
143
+ except Exception as e:
144
+ logging.error(f"BufferedBatchWriter failed to flush: {e}", exc_info=True)
app/workers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (7.08 kB). View file
 
components/__init__.py ADDED
File without changes
components/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (163 Bytes). View file
 
components/__pycache__/data_ingestion.cpython-311.pyc ADDED
Binary file (5 kB). View file
 
components/__pycache__/data_preprocessing.cpython-311.pyc ADDED
Binary file (9.21 kB). View file
 
components/__pycache__/data_validation.cpython-311.pyc ADDED
Binary file (13.9 kB). View file