File size: 2,239 Bytes
1c23b7c
 
1da3dc8
 
 
 
 
 
 
53469df
1c23b7c
 
 
 
 
53469df
 
 
80f29b9
53469df
 
 
 
80f29b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23b7c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from __future__ import annotations

"""Load feedback data from CSV and normalize expected columns.

The system expects a CSV with at least the columns: ID, ServiceName, Level, Text.
`load_feedback` validates the presence of these columns, drops empty text rows,
and returns a cleaned Pandas DataFrame.
"""

from pathlib import Path
import pandas as pd
from .config import settings


def load_feedback(csv_path: str | None = None) -> pd.DataFrame:
    path_str = csv_path or settings.csv_path
    # Resolve path relative to project root if it's a relative path
    if Path(path_str).is_absolute():
        path = Path(path_str)
    else:
        # Calculate project root: 2_backend_llm/app/data_loader.py -> root/
        project_root = Path(__file__).resolve().parent.parent.parent
        path = project_root / path_str
    
    # Debug: print path information
    print(f"πŸ” Looking for CSV file at: {path}", flush=True)
    print(f"πŸ” Path exists: {path.exists()}", flush=True)
    print(f"πŸ” Current working directory: {Path.cwd()}", flush=True)
    print(f"πŸ” Project root: {project_root}", flush=True)
    
    if not path.exists():
        # Try alternative paths
        alt_paths = [
            project_root / "0_preprocessing" / "feedback_transformed_2.csv",
            Path("0_preprocessing") / "feedback_transformed_2.csv",
            Path("feedback_transformed_2.csv"),
        ]
        for alt_path in alt_paths:
            if alt_path.exists():
                print(f"βœ… Found CSV at alternative path: {alt_path}", flush=True)
                path = alt_path
                break
        else:
            raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}")
    
    print(f"βœ… Loading CSV from: {path}", flush=True)
    df = pd.read_csv(path)
    # Basic normalization of expected columns if present
    expected = ["ID", "ServiceName", "Level", "Text"]
    missing = [c for c in expected if c not in df.columns]
    if missing:
        raise ValueError(f"Missing expected columns in CSV: {missing}")
    # Drop rows with empty text
    df = df[df["Text"].astype(str).str.strip().ne("")].copy()
    df.reset_index(drop=True, inplace=True)
    return df