from __future__ import annotations """Load feedback data from CSV and normalize expected columns. The system expects a CSV with at least the columns: ID, ServiceName, Level, Text. `load_feedback` validates the presence of these columns, drops empty text rows, and returns a cleaned Pandas DataFrame. """ from pathlib import Path import pandas as pd from .config import settings def load_feedback(csv_path: str | None = None) -> pd.DataFrame: path_str = csv_path or settings.csv_path # Resolve path relative to project root if it's a relative path if Path(path_str).is_absolute(): path = Path(path_str) else: # Calculate project root: 2_backend_llm/app/data_loader.py -> root/ project_root = Path(__file__).resolve().parent.parent.parent path = project_root / path_str # Debug: print path information print(f"🔍 Looking for CSV file at: {path}", flush=True) print(f"🔍 Path exists: {path.exists()}", flush=True) print(f"🔍 Current working directory: {Path.cwd()}", flush=True) print(f"🔍 Project root: {project_root}", flush=True) if not path.exists(): # Try alternative paths alt_paths = [ project_root / "0_preprocessing" / "feedback_transformed_2.csv", Path("0_preprocessing") / "feedback_transformed_2.csv", Path("feedback_transformed_2.csv"), ] for alt_path in alt_paths: if alt_path.exists(): print(f"✅ Found CSV at alternative path: {alt_path}", flush=True) path = alt_path break else: raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}") print(f"✅ Loading CSV from: {path}", flush=True) df = pd.read_csv(path) # Basic normalization of expected columns if present expected = ["ID", "ServiceName", "Level", "Text"] missing = [c for c in expected if c not in df.columns] if missing: raise ValueError(f"Missing expected columns in CSV: {missing}") # Drop rows with empty text df = df[df["Text"].astype(str).str.strip().ne("")].copy() df.reset_index(drop=True, inplace=True) return df