Upload folder using huggingface_hub
Browse files
src/nodes/vectorizationAgentNode.py
CHANGED
|
@@ -6,7 +6,7 @@ Uses language-specific BERT models for Sinhala, Tamil, and English
|
|
| 6 |
|
| 7 |
import sys
|
| 8 |
import logging
|
| 9 |
-
from datetime import datetime
|
| 10 |
from typing import Dict, Any, List
|
| 11 |
from pathlib import Path
|
| 12 |
import numpy as np
|
|
@@ -794,7 +794,7 @@ Format your response in a clear, structured manner."""
|
|
| 794 |
"domain": "vectorization",
|
| 795 |
"category": "text_analysis",
|
| 796 |
"summary": f"Processed {len(embeddings)} texts with multilingual BERT models",
|
| 797 |
-
"timestamp": datetime.
|
| 798 |
"severity": "low",
|
| 799 |
"impact_type": "analysis",
|
| 800 |
"confidence": 0.9,
|
|
@@ -820,7 +820,7 @@ Format your response in a clear, structured manner."""
|
|
| 820 |
"domain": "anomaly_detection",
|
| 821 |
"category": "ml_analysis",
|
| 822 |
"summary": f"ML Anomaly Detection: {len(anomalies)} anomalies found in {anomaly_results.get('total_analyzed', 0)} texts",
|
| 823 |
-
"timestamp": datetime.
|
| 824 |
"severity": "high" if len(anomalies) > 5 else "medium",
|
| 825 |
"impact_type": "risk",
|
| 826 |
"confidence": 0.85,
|
|
@@ -840,7 +840,7 @@ Format your response in a clear, structured manner."""
|
|
| 840 |
"domain": "anomaly_detection",
|
| 841 |
"category": "anomaly",
|
| 842 |
"summary": f"Anomaly detected (score: {anomaly.get('anomaly_score', 0):.2f})",
|
| 843 |
-
"timestamp": datetime.
|
| 844 |
"severity": (
|
| 845 |
"high"
|
| 846 |
if anomaly.get("anomaly_score", 0) > 0.7
|
|
@@ -863,7 +863,7 @@ Format your response in a clear, structured manner."""
|
|
| 863 |
"domain": "anomaly_detection",
|
| 864 |
"category": "system_info",
|
| 865 |
"summary": "ML model not trained yet - using severity-based fallback",
|
| 866 |
-
"timestamp": datetime.
|
| 867 |
"severity": "low",
|
| 868 |
"impact_type": "info",
|
| 869 |
"confidence": 1.0,
|
|
@@ -878,7 +878,7 @@ Format your response in a clear, structured manner."""
|
|
| 878 |
"domain": "vectorization",
|
| 879 |
"category": "opportunity",
|
| 880 |
"summary": opp.get("description", "Opportunity detected"),
|
| 881 |
-
"timestamp": datetime.
|
| 882 |
"severity": "medium",
|
| 883 |
"impact_type": "opportunity",
|
| 884 |
"confidence": opp.get("confidence", 0.7),
|
|
@@ -893,7 +893,7 @@ Format your response in a clear, structured manner."""
|
|
| 893 |
"domain": "vectorization",
|
| 894 |
"category": "threat",
|
| 895 |
"summary": threat.get("description", "Threat detected"),
|
| 896 |
-
"timestamp": datetime.
|
| 897 |
"severity": "high",
|
| 898 |
"impact_type": "risk",
|
| 899 |
"confidence": threat.get("confidence", 0.7),
|
|
@@ -903,7 +903,7 @@ Format your response in a clear, structured manner."""
|
|
| 903 |
# Final output
|
| 904 |
final_output = {
|
| 905 |
"batch_id": batch_id,
|
| 906 |
-
"timestamp": datetime.
|
| 907 |
"total_texts": len(embeddings),
|
| 908 |
"processing_stats": processing_stats,
|
| 909 |
"expert_summary": expert_summary,
|
|
|
|
| 6 |
|
| 7 |
import sys
|
| 8 |
import logging
|
| 9 |
+
from datetime import datetime, timezone
|
| 10 |
from typing import Dict, Any, List
|
| 11 |
from pathlib import Path
|
| 12 |
import numpy as np
|
|
|
|
| 794 |
"domain": "vectorization",
|
| 795 |
"category": "text_analysis",
|
| 796 |
"summary": f"Processed {len(embeddings)} texts with multilingual BERT models",
|
| 797 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 798 |
"severity": "low",
|
| 799 |
"impact_type": "analysis",
|
| 800 |
"confidence": 0.9,
|
|
|
|
| 820 |
"domain": "anomaly_detection",
|
| 821 |
"category": "ml_analysis",
|
| 822 |
"summary": f"ML Anomaly Detection: {len(anomalies)} anomalies found in {anomaly_results.get('total_analyzed', 0)} texts",
|
| 823 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 824 |
"severity": "high" if len(anomalies) > 5 else "medium",
|
| 825 |
"impact_type": "risk",
|
| 826 |
"confidence": 0.85,
|
|
|
|
| 840 |
"domain": "anomaly_detection",
|
| 841 |
"category": "anomaly",
|
| 842 |
"summary": f"Anomaly detected (score: {anomaly.get('anomaly_score', 0):.2f})",
|
| 843 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 844 |
"severity": (
|
| 845 |
"high"
|
| 846 |
if anomaly.get("anomaly_score", 0) > 0.7
|
|
|
|
| 863 |
"domain": "anomaly_detection",
|
| 864 |
"category": "system_info",
|
| 865 |
"summary": "ML model not trained yet - using severity-based fallback",
|
| 866 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 867 |
"severity": "low",
|
| 868 |
"impact_type": "info",
|
| 869 |
"confidence": 1.0,
|
|
|
|
| 878 |
"domain": "vectorization",
|
| 879 |
"category": "opportunity",
|
| 880 |
"summary": opp.get("description", "Opportunity detected"),
|
| 881 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 882 |
"severity": "medium",
|
| 883 |
"impact_type": "opportunity",
|
| 884 |
"confidence": opp.get("confidence", 0.7),
|
|
|
|
| 893 |
"domain": "vectorization",
|
| 894 |
"category": "threat",
|
| 895 |
"summary": threat.get("description", "Threat detected"),
|
| 896 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 897 |
"severity": "high",
|
| 898 |
"impact_type": "risk",
|
| 899 |
"confidence": threat.get("confidence", 0.7),
|
|
|
|
| 903 |
# Final output
|
| 904 |
final_output = {
|
| 905 |
"batch_id": batch_id,
|
| 906 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 907 |
"total_texts": len(embeddings),
|
| 908 |
"processing_stats": processing_stats,
|
| 909 |
"expert_summary": expert_summary,
|
src/utils/trending_detector.py
CHANGED
|
@@ -15,7 +15,7 @@ import json
|
|
| 15 |
import sqlite3
|
| 16 |
import hashlib
|
| 17 |
import logging
|
| 18 |
-
from datetime import datetime, timedelta
|
| 19 |
from typing import List, Dict, Any, Optional, Tuple
|
| 20 |
from pathlib import Path
|
| 21 |
|
|
@@ -110,7 +110,7 @@ class TrendingDetector:
|
|
| 110 |
|
| 111 |
def _get_hour_bucket(self, dt: datetime = None) -> str:
|
| 112 |
"""Get the hour bucket string (YYYY-MM-DD-HH)"""
|
| 113 |
-
dt = dt or datetime.
|
| 114 |
return dt.strftime("%Y-%m-%d-%H")
|
| 115 |
|
| 116 |
def record_mention(
|
|
@@ -130,7 +130,7 @@ class TrendingDetector:
|
|
| 130 |
timestamp: When the mention occurred (default: now)
|
| 131 |
"""
|
| 132 |
topic_hash = self._topic_hash(topic)
|
| 133 |
-
ts = timestamp or datetime.
|
| 134 |
hour_bucket = self._get_hour_bucket(ts)
|
| 135 |
|
| 136 |
with sqlite3.connect(self.db_path) as conn:
|
|
@@ -180,7 +180,7 @@ class TrendingDetector:
|
|
| 180 |
Momentum value (1.0 = normal, >2.0 = trending, >3.0 = spike)
|
| 181 |
"""
|
| 182 |
topic_hash = self._topic_hash(topic)
|
| 183 |
-
now = datetime.
|
| 184 |
current_hour = self._get_hour_bucket(now)
|
| 185 |
|
| 186 |
with sqlite3.connect(self.db_path) as conn:
|
|
@@ -230,7 +230,7 @@ class TrendingDetector:
|
|
| 230 |
Returns:
|
| 231 |
List of trending topics with their momentum values
|
| 232 |
"""
|
| 233 |
-
now = datetime.
|
| 234 |
current_hour = self._get_hour_bucket(now)
|
| 235 |
|
| 236 |
trending = []
|
|
@@ -291,7 +291,7 @@ class TrendingDetector:
|
|
| 291 |
List of hourly counts
|
| 292 |
"""
|
| 293 |
topic_hash = self._topic_hash(topic)
|
| 294 |
-
now = datetime.
|
| 295 |
|
| 296 |
history = []
|
| 297 |
with sqlite3.connect(self.db_path) as conn:
|
|
@@ -320,7 +320,7 @@ class TrendingDetector:
|
|
| 320 |
Args:
|
| 321 |
days: Number of days to keep
|
| 322 |
"""
|
| 323 |
-
cutoff = datetime.
|
| 324 |
cutoff_str = cutoff.isoformat()
|
| 325 |
cutoff_bucket = self._get_hour_bucket(cutoff)
|
| 326 |
|
|
|
|
| 15 |
import sqlite3
|
| 16 |
import hashlib
|
| 17 |
import logging
|
| 18 |
+
from datetime import datetime, timedelta, timezone
|
| 19 |
from typing import List, Dict, Any, Optional, Tuple
|
| 20 |
from pathlib import Path
|
| 21 |
|
|
|
|
| 110 |
|
| 111 |
def _get_hour_bucket(self, dt: datetime = None) -> str:
|
| 112 |
"""Get the hour bucket string (YYYY-MM-DD-HH)"""
|
| 113 |
+
dt = dt or datetime.now(timezone.utc)
|
| 114 |
return dt.strftime("%Y-%m-%d-%H")
|
| 115 |
|
| 116 |
def record_mention(
|
|
|
|
| 130 |
timestamp: When the mention occurred (default: now)
|
| 131 |
"""
|
| 132 |
topic_hash = self._topic_hash(topic)
|
| 133 |
+
ts = timestamp or datetime.now(timezone.utc)
|
| 134 |
hour_bucket = self._get_hour_bucket(ts)
|
| 135 |
|
| 136 |
with sqlite3.connect(self.db_path) as conn:
|
|
|
|
| 180 |
Momentum value (1.0 = normal, >2.0 = trending, >3.0 = spike)
|
| 181 |
"""
|
| 182 |
topic_hash = self._topic_hash(topic)
|
| 183 |
+
now = datetime.now(timezone.utc)
|
| 184 |
current_hour = self._get_hour_bucket(now)
|
| 185 |
|
| 186 |
with sqlite3.connect(self.db_path) as conn:
|
|
|
|
| 230 |
Returns:
|
| 231 |
List of trending topics with their momentum values
|
| 232 |
"""
|
| 233 |
+
now = datetime.now(timezone.utc)
|
| 234 |
current_hour = self._get_hour_bucket(now)
|
| 235 |
|
| 236 |
trending = []
|
|
|
|
| 291 |
List of hourly counts
|
| 292 |
"""
|
| 293 |
topic_hash = self._topic_hash(topic)
|
| 294 |
+
now = datetime.now(timezone.utc)
|
| 295 |
|
| 296 |
history = []
|
| 297 |
with sqlite3.connect(self.db_path) as conn:
|
|
|
|
| 320 |
Args:
|
| 321 |
days: Number of days to keep
|
| 322 |
"""
|
| 323 |
+
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
| 324 |
cutoff_str = cutoff.isoformat()
|
| 325 |
cutoff_bucket = self._get_hour_bucket(cutoff)
|
| 326 |
|
vectorizer_anomaly_visualization.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|