Spaces:
Running
Running
| """Feedback ingestion utilities for drift analysis.""" | |
| from pathlib import Path | |
| from typing import List, Tuple | |
| from loguru import logger | |
| import numpy as np | |
| import pandas as pd | |
| from turing import config | |
| def load_feedback_for_language( | |
| feedback_path: Path, | |
| language: str, | |
| ) -> Tuple[List[str], np.ndarray]: | |
| """ | |
| Load user feedback for a given language and return texts with one-hot labels. | |
| Rows with unknown labels are skipped. Returns empty lists if no valid rows. | |
| """ | |
| if not feedback_path.exists(): | |
| raise FileNotFoundError(f"Feedback file not found: {feedback_path}") | |
| df = pd.read_csv(feedback_path) | |
| if ( | |
| "Language" not in df.columns | |
| or "Input_Text" not in df.columns | |
| or "User_Correction" not in df.columns | |
| ): | |
| raise ValueError( | |
| "Feedback file must contain Language, Input_Text, and User_Correction columns" | |
| ) | |
| df_lang = df[df["Language"].str.lower() == language.lower()] | |
| if df_lang.empty: | |
| logger.warning(f"No feedback rows found for language {language}") | |
| return [], np.array([]) | |
| label_space = config.LABELS_MAP.get(language) | |
| if not label_space: | |
| raise ValueError(f"Label map not found for language: {language}") | |
| label_to_idx = {label.lower(): idx for idx, label in enumerate(label_space)} | |
| texts: List[str] = [] | |
| labels: List[np.ndarray] = [] | |
| for _, row in df_lang.iterrows(): | |
| correction = str(row["User_Correction"]).strip().lower() | |
| idx = label_to_idx.get(correction) | |
| if idx is None: | |
| logger.warning(f"Skipping feedback row with unknown label: {row['User_Correction']}") | |
| continue | |
| one_hot = np.zeros(len(label_space), dtype=int) | |
| one_hot[idx] = 1 | |
| texts.append(str(row["Input_Text"])) | |
| labels.append(one_hot) | |
| if not texts: | |
| logger.warning(f"No valid feedback rows for language {language}") | |
| return [], np.array([]) | |
| return texts, np.vstack(labels) | |