KES-Hack / src /services /feedback_store.py
Meshyboi's picture
Upload 86 files
214209a verified
"""
Feedback Store — Continuous Learning Data Collection
Stores user-submitted feedback (false positives / false negatives)
for future model retraining and quality monitoring.
"""
import json
import logging
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
logger = logging.getLogger(__name__)
FEEDBACK_FILE = Path(__file__).parent.parent.parent / "feedback.json"
MAX_FEEDBACK = 1000
class FeedbackStore:
"""Stores and manages user feedback for continuous learning."""
def __init__(self):
self.feedback: List[Dict[str, Any]] = []
self._load()
def _load(self):
"""Load feedback from disk."""
try:
if FEEDBACK_FILE.exists():
with open(FEEDBACK_FILE, "r") as f:
self.feedback = json.load(f)
logger.info(f"[Feedback] Loaded {len(self.feedback)} feedback entries")
except Exception as e:
logger.error(f"[Feedback] Failed to load feedback: {e}")
self.feedback = []
def _save(self):
"""Persist feedback to disk."""
try:
FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(FEEDBACK_FILE, "w") as f:
json.dump(self.feedback, f, indent=2, default=str)
except Exception as e:
logger.error(f"[Feedback] Failed to save feedback: {e}")
def add_feedback(
self,
url: str,
original_verdict: str,
original_score: int,
user_label: str,
user_id: Optional[str] = None,
raw_text: Optional[str] = None
) -> Dict[str, Any]:
"""
Record a user's feedback on a detection result.
Args:
url: The URL that was analyzed
original_verdict: The system's original verdict (e.g., "high_risk")
original_score: The system's original risk score
user_label: The user's correction — "safe" or "phishing"
user_id: Optional anonymous user ID
raw_text: Optional raw text that was analyzed
Returns:
The stored feedback entry
"""
entry = {
"id": len(self.feedback) + 1,
"url": url,
"original_verdict": original_verdict,
"original_score": original_score,
"user_label": user_label,
"user_id": user_id,
"raw_text": raw_text[:500] if raw_text else None,
"timestamp": datetime.utcnow().isoformat() + "Z",
"is_false_positive": (
original_verdict in ("high_risk", "medium_risk") and user_label == "safe"
),
"is_false_negative": (
original_verdict in ("safe", "low_risk", "trusted") and user_label == "phishing"
)
}
self.feedback.insert(0, entry)
# Cap at MAX_FEEDBACK
if len(self.feedback) > MAX_FEEDBACK:
self.feedback = self.feedback[:MAX_FEEDBACK]
self._save()
logger.info(
f"[Feedback] Recorded: url={url[:40]}... "
f"verdict={original_verdict} → user_label={user_label}"
)
return entry
def get_stats(self) -> Dict[str, Any]:
"""
Compute feedback statistics for the dashboard.
Returns:
{
"total": int,
"false_positives": int,
"false_negatives": int,
"fp_rate": float (0–1),
"fn_rate": float (0–1),
"recent_feedback": list (last 10)
}
"""
total = len(self.feedback)
fp = sum(1 for f in self.feedback if f.get("is_false_positive"))
fn = sum(1 for f in self.feedback if f.get("is_false_negative"))
return {
"total": total,
"false_positives": fp,
"false_negatives": fn,
"fp_rate": round(fp / total, 4) if total > 0 else 0.0,
"fn_rate": round(fn / total, 4) if total > 0 else 0.0,
"recent_feedback": self.feedback[:10]
}
def get_pending_review(self, limit: int = 50) -> List[Dict[str, Any]]:
"""
Get feedback entries that could be used for model retraining.
Prioritizes false positives and false negatives.
"""
actionable = [
f for f in self.feedback
if f.get("is_false_positive") or f.get("is_false_negative")
]
return actionable[:limit]
# Global singleton
feedback_store = FeedbackStore()