Spaces:

galbendavids
/

feedback-analysis-agent

Sleeping

File size: 2,239 Bytes

from __future__ import annotations

"""Load feedback data from CSV and normalize expected columns.

The system expects a CSV with at least the columns: ID, ServiceName, Level, Text.
`load_feedback` validates the presence of these columns, drops empty text rows,
and returns a cleaned Pandas DataFrame.
"""

from pathlib import Path
import pandas as pd
from .config import settings


def load_feedback(csv_path: str | None = None) -> pd.DataFrame:
    path_str = csv_path or settings.csv_path
    # Resolve path relative to project root if it's a relative path
    if Path(path_str).is_absolute():
        path = Path(path_str)
    else:
        # Calculate project root: 2_backend_llm/app/data_loader.py -> root/
        project_root = Path(__file__).resolve().parent.parent.parent
        path = project_root / path_str
    
    # Debug: print path information
    print(f"🔍 Looking for CSV file at: {path}", flush=True)
    print(f"🔍 Path exists: {path.exists()}", flush=True)
    print(f"🔍 Current working directory: {Path.cwd()}", flush=True)
    print(f"🔍 Project root: {project_root}", flush=True)
    
    if not path.exists():
        # Try alternative paths
        alt_paths = [
            project_root / "0_preprocessing" / "feedback_transformed_2.csv",
            Path("0_preprocessing") / "feedback_transformed_2.csv",
            Path("feedback_transformed_2.csv"),
        ]
        for alt_path in alt_paths:
            if alt_path.exists():
                print(f"✅ Found CSV at alternative path: {alt_path}", flush=True)
                path = alt_path
                break
        else:
            raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}")
    
    print(f"✅ Loading CSV from: {path}", flush=True)
    df = pd.read_csv(path)
    # Basic normalization of expected columns if present
    expected = ["ID", "ServiceName", "Level", "Text"]
    missing = [c for c in expected if c not in df.columns]
    if missing:
        raise ValueError(f"Missing expected columns in CSV: {missing}")
    # Drop rows with empty text
    df = df[df["Text"].astype(str).str.strip().ne("")].copy()
    df.reset_index(drop=True, inplace=True)
    return df