nivakaran commited on
Commit
c565e08
·
verified ·
1 Parent(s): 16ec2cf

Upload folder using huggingface_hub

Browse files
src/nodes/vectorizationAgentNode.py CHANGED
@@ -6,7 +6,7 @@ Uses language-specific BERT models for Sinhala, Tamil, and English
6
 
7
  import sys
8
  import logging
9
- from datetime import datetime
10
  from typing import Dict, Any, List
11
  from pathlib import Path
12
  import numpy as np
@@ -794,7 +794,7 @@ Format your response in a clear, structured manner."""
794
  "domain": "vectorization",
795
  "category": "text_analysis",
796
  "summary": f"Processed {len(embeddings)} texts with multilingual BERT models",
797
- "timestamp": datetime.utcnow().isoformat(),
798
  "severity": "low",
799
  "impact_type": "analysis",
800
  "confidence": 0.9,
@@ -820,7 +820,7 @@ Format your response in a clear, structured manner."""
820
  "domain": "anomaly_detection",
821
  "category": "ml_analysis",
822
  "summary": f"ML Anomaly Detection: {len(anomalies)} anomalies found in {anomaly_results.get('total_analyzed', 0)} texts",
823
- "timestamp": datetime.utcnow().isoformat(),
824
  "severity": "high" if len(anomalies) > 5 else "medium",
825
  "impact_type": "risk",
826
  "confidence": 0.85,
@@ -840,7 +840,7 @@ Format your response in a clear, structured manner."""
840
  "domain": "anomaly_detection",
841
  "category": "anomaly",
842
  "summary": f"Anomaly detected (score: {anomaly.get('anomaly_score', 0):.2f})",
843
- "timestamp": datetime.utcnow().isoformat(),
844
  "severity": (
845
  "high"
846
  if anomaly.get("anomaly_score", 0) > 0.7
@@ -863,7 +863,7 @@ Format your response in a clear, structured manner."""
863
  "domain": "anomaly_detection",
864
  "category": "system_info",
865
  "summary": "ML model not trained yet - using severity-based fallback",
866
- "timestamp": datetime.utcnow().isoformat(),
867
  "severity": "low",
868
  "impact_type": "info",
869
  "confidence": 1.0,
@@ -878,7 +878,7 @@ Format your response in a clear, structured manner."""
878
  "domain": "vectorization",
879
  "category": "opportunity",
880
  "summary": opp.get("description", "Opportunity detected"),
881
- "timestamp": datetime.utcnow().isoformat(),
882
  "severity": "medium",
883
  "impact_type": "opportunity",
884
  "confidence": opp.get("confidence", 0.7),
@@ -893,7 +893,7 @@ Format your response in a clear, structured manner."""
893
  "domain": "vectorization",
894
  "category": "threat",
895
  "summary": threat.get("description", "Threat detected"),
896
- "timestamp": datetime.utcnow().isoformat(),
897
  "severity": "high",
898
  "impact_type": "risk",
899
  "confidence": threat.get("confidence", 0.7),
@@ -903,7 +903,7 @@ Format your response in a clear, structured manner."""
903
  # Final output
904
  final_output = {
905
  "batch_id": batch_id,
906
- "timestamp": datetime.utcnow().isoformat(),
907
  "total_texts": len(embeddings),
908
  "processing_stats": processing_stats,
909
  "expert_summary": expert_summary,
 
6
 
7
  import sys
8
  import logging
9
+ from datetime import datetime, timezone
10
  from typing import Dict, Any, List
11
  from pathlib import Path
12
  import numpy as np
 
794
  "domain": "vectorization",
795
  "category": "text_analysis",
796
  "summary": f"Processed {len(embeddings)} texts with multilingual BERT models",
797
+ "timestamp": datetime.now(timezone.utc).isoformat(),
798
  "severity": "low",
799
  "impact_type": "analysis",
800
  "confidence": 0.9,
 
820
  "domain": "anomaly_detection",
821
  "category": "ml_analysis",
822
  "summary": f"ML Anomaly Detection: {len(anomalies)} anomalies found in {anomaly_results.get('total_analyzed', 0)} texts",
823
+ "timestamp": datetime.now(timezone.utc).isoformat(),
824
  "severity": "high" if len(anomalies) > 5 else "medium",
825
  "impact_type": "risk",
826
  "confidence": 0.85,
 
840
  "domain": "anomaly_detection",
841
  "category": "anomaly",
842
  "summary": f"Anomaly detected (score: {anomaly.get('anomaly_score', 0):.2f})",
843
+ "timestamp": datetime.now(timezone.utc).isoformat(),
844
  "severity": (
845
  "high"
846
  if anomaly.get("anomaly_score", 0) > 0.7
 
863
  "domain": "anomaly_detection",
864
  "category": "system_info",
865
  "summary": "ML model not trained yet - using severity-based fallback",
866
+ "timestamp": datetime.now(timezone.utc).isoformat(),
867
  "severity": "low",
868
  "impact_type": "info",
869
  "confidence": 1.0,
 
878
  "domain": "vectorization",
879
  "category": "opportunity",
880
  "summary": opp.get("description", "Opportunity detected"),
881
+ "timestamp": datetime.now(timezone.utc).isoformat(),
882
  "severity": "medium",
883
  "impact_type": "opportunity",
884
  "confidence": opp.get("confidence", 0.7),
 
893
  "domain": "vectorization",
894
  "category": "threat",
895
  "summary": threat.get("description", "Threat detected"),
896
+ "timestamp": datetime.now(timezone.utc).isoformat(),
897
  "severity": "high",
898
  "impact_type": "risk",
899
  "confidence": threat.get("confidence", 0.7),
 
903
  # Final output
904
  final_output = {
905
  "batch_id": batch_id,
906
+ "timestamp": datetime.now(timezone.utc).isoformat(),
907
  "total_texts": len(embeddings),
908
  "processing_stats": processing_stats,
909
  "expert_summary": expert_summary,
src/utils/trending_detector.py CHANGED
@@ -15,7 +15,7 @@ import json
15
  import sqlite3
16
  import hashlib
17
  import logging
18
- from datetime import datetime, timedelta
19
  from typing import List, Dict, Any, Optional, Tuple
20
  from pathlib import Path
21
 
@@ -110,7 +110,7 @@ class TrendingDetector:
110
 
111
  def _get_hour_bucket(self, dt: datetime = None) -> str:
112
  """Get the hour bucket string (YYYY-MM-DD-HH)"""
113
- dt = dt or datetime.utcnow()
114
  return dt.strftime("%Y-%m-%d-%H")
115
 
116
  def record_mention(
@@ -130,7 +130,7 @@ class TrendingDetector:
130
  timestamp: When the mention occurred (default: now)
131
  """
132
  topic_hash = self._topic_hash(topic)
133
- ts = timestamp or datetime.utcnow()
134
  hour_bucket = self._get_hour_bucket(ts)
135
 
136
  with sqlite3.connect(self.db_path) as conn:
@@ -180,7 +180,7 @@ class TrendingDetector:
180
  Momentum value (1.0 = normal, >2.0 = trending, >3.0 = spike)
181
  """
182
  topic_hash = self._topic_hash(topic)
183
- now = datetime.utcnow()
184
  current_hour = self._get_hour_bucket(now)
185
 
186
  with sqlite3.connect(self.db_path) as conn:
@@ -230,7 +230,7 @@ class TrendingDetector:
230
  Returns:
231
  List of trending topics with their momentum values
232
  """
233
- now = datetime.utcnow()
234
  current_hour = self._get_hour_bucket(now)
235
 
236
  trending = []
@@ -291,7 +291,7 @@ class TrendingDetector:
291
  List of hourly counts
292
  """
293
  topic_hash = self._topic_hash(topic)
294
- now = datetime.utcnow()
295
 
296
  history = []
297
  with sqlite3.connect(self.db_path) as conn:
@@ -320,7 +320,7 @@ class TrendingDetector:
320
  Args:
321
  days: Number of days to keep
322
  """
323
- cutoff = datetime.utcnow() - timedelta(days=days)
324
  cutoff_str = cutoff.isoformat()
325
  cutoff_bucket = self._get_hour_bucket(cutoff)
326
 
 
15
  import sqlite3
16
  import hashlib
17
  import logging
18
+ from datetime import datetime, timedelta, timezone
19
  from typing import List, Dict, Any, Optional, Tuple
20
  from pathlib import Path
21
 
 
110
 
111
  def _get_hour_bucket(self, dt: datetime = None) -> str:
112
  """Get the hour bucket string (YYYY-MM-DD-HH)"""
113
+ dt = dt or datetime.now(timezone.utc)
114
  return dt.strftime("%Y-%m-%d-%H")
115
 
116
  def record_mention(
 
130
  timestamp: When the mention occurred (default: now)
131
  """
132
  topic_hash = self._topic_hash(topic)
133
+ ts = timestamp or datetime.now(timezone.utc)
134
  hour_bucket = self._get_hour_bucket(ts)
135
 
136
  with sqlite3.connect(self.db_path) as conn:
 
180
  Momentum value (1.0 = normal, >2.0 = trending, >3.0 = spike)
181
  """
182
  topic_hash = self._topic_hash(topic)
183
+ now = datetime.now(timezone.utc)
184
  current_hour = self._get_hour_bucket(now)
185
 
186
  with sqlite3.connect(self.db_path) as conn:
 
230
  Returns:
231
  List of trending topics with their momentum values
232
  """
233
+ now = datetime.now(timezone.utc)
234
  current_hour = self._get_hour_bucket(now)
235
 
236
  trending = []
 
291
  List of hourly counts
292
  """
293
  topic_hash = self._topic_hash(topic)
294
+ now = datetime.now(timezone.utc)
295
 
296
  history = []
297
  with sqlite3.connect(self.db_path) as conn:
 
320
  Args:
321
  days: Number of days to keep
322
  """
323
+ cutoff = datetime.now(timezone.utc) - timedelta(days=days)
324
  cutoff_str = cutoff.isoformat()
325
  cutoff_bucket = self._get_hour_bucket(cutoff)
326
 
vectorizer_anomaly_visualization.png CHANGED

Git LFS Details

  • SHA256: 5f0bc1c338b312268601e3e62ea34d3f8301745b3519cec96f3362becc030813
  • Pointer size: 131 Bytes
  • Size of remote file: 182 kB

Git LFS Details

  • SHA256: d81dade48c9da94862b253c9273061f51b9ad9d31215289832119f425064c06c
  • Pointer size: 130 Bytes
  • Size of remote file: 49.4 kB