|
|
from __future__ import annotations |
|
|
|
|
|
"""Load feedback data from CSV and normalize expected columns. |
|
|
|
|
|
The system expects a CSV with at least the columns: ID, ServiceName, Level, Text. |
|
|
`load_feedback` validates the presence of these columns, drops empty text rows, |
|
|
and returns a cleaned Pandas DataFrame. |
|
|
""" |
|
|
|
|
|
from pathlib import Path |
|
|
import pandas as pd |
|
|
from .config import settings |
|
|
|
|
|
|
|
|
def load_feedback(csv_path: str | None = None) -> pd.DataFrame: |
|
|
path_str = csv_path or settings.csv_path |
|
|
|
|
|
if Path(path_str).is_absolute(): |
|
|
path = Path(path_str) |
|
|
else: |
|
|
|
|
|
project_root = Path(__file__).resolve().parent.parent.parent |
|
|
path = project_root / path_str |
|
|
|
|
|
|
|
|
print(f"π Looking for CSV file at: {path}", flush=True) |
|
|
print(f"π Path exists: {path.exists()}", flush=True) |
|
|
print(f"π Current working directory: {Path.cwd()}", flush=True) |
|
|
print(f"π Project root: {project_root}", flush=True) |
|
|
|
|
|
if not path.exists(): |
|
|
|
|
|
alt_paths = [ |
|
|
project_root / "0_preprocessing" / "feedback_transformed_2.csv", |
|
|
Path("0_preprocessing") / "feedback_transformed_2.csv", |
|
|
Path("feedback_transformed_2.csv"), |
|
|
] |
|
|
for alt_path in alt_paths: |
|
|
if alt_path.exists(): |
|
|
print(f"β
Found CSV at alternative path: {alt_path}", flush=True) |
|
|
path = alt_path |
|
|
break |
|
|
else: |
|
|
raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}") |
|
|
|
|
|
print(f"β
Loading CSV from: {path}", flush=True) |
|
|
df = pd.read_csv(path) |
|
|
|
|
|
expected = ["ID", "ServiceName", "Level", "Text"] |
|
|
missing = [c for c in expected if c not in df.columns] |
|
|
if missing: |
|
|
raise ValueError(f"Missing expected columns in CSV: {missing}") |
|
|
|
|
|
df = df[df["Text"].astype(str).str.strip().ne("")].copy() |
|
|
df.reset_index(drop=True, inplace=True) |
|
|
return df |
|
|
|
|
|
|