File size: 503 Bytes
f84949e
3d9a801
f84949e
3d9a801
f84949e
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import pandas as pd

from projet_05 import dataset as ds


def test_clean_text_values_normalizes_tokens():
    df = pd.DataFrame({"col": ["nan", "JE ne sais pas", " value "]})
    cleaned = ds.clean_text_values(df)
    assert cleaned["col"].tolist() == [pd.NA, pd.NA, "value"]


def test_harmonize_id_column_extracts_digits_and_handles_invalid():
    df = pd.DataFrame({"id": ["EMP-001", "missing"]})
    harmonized = ds._harmonize_id_column(df, "id")
    assert harmonized["id"].tolist() == [1, pd.NA]