turing-space / turing /monitoring /feedback_manager.py
github-actions[bot]
Sync turing folder from GitHub
38593e7
"""Feedback ingestion utilities for drift analysis."""
from pathlib import Path
from typing import List, Tuple
from loguru import logger
import numpy as np
import pandas as pd
from turing import config
def load_feedback_for_language(
feedback_path: Path,
language: str,
) -> Tuple[List[str], np.ndarray]:
"""
Load user feedback for a given language and return texts with one-hot labels.
Rows with unknown labels are skipped. Returns empty lists if no valid rows.
"""
if not feedback_path.exists():
raise FileNotFoundError(f"Feedback file not found: {feedback_path}")
df = pd.read_csv(feedback_path)
if (
"Language" not in df.columns
or "Input_Text" not in df.columns
or "User_Correction" not in df.columns
):
raise ValueError(
"Feedback file must contain Language, Input_Text, and User_Correction columns"
)
df_lang = df[df["Language"].str.lower() == language.lower()]
if df_lang.empty:
logger.warning(f"No feedback rows found for language {language}")
return [], np.array([])
label_space = config.LABELS_MAP.get(language)
if not label_space:
raise ValueError(f"Label map not found for language: {language}")
label_to_idx = {label.lower(): idx for idx, label in enumerate(label_space)}
texts: List[str] = []
labels: List[np.ndarray] = []
for _, row in df_lang.iterrows():
correction = str(row["User_Correction"]).strip().lower()
idx = label_to_idx.get(correction)
if idx is None:
logger.warning(f"Skipping feedback row with unknown label: {row['User_Correction']}")
continue
one_hot = np.zeros(len(label_space), dtype=int)
one_hot[idx] = 1
texts.append(str(row["Input_Text"]))
labels.append(one_hot)
if not texts:
logger.warning(f"No valid feedback rows for language {language}")
return [], np.array([])
return texts, np.vstack(labels)