hieu3636 commited on
Commit
96f75f0
·
verified ·
1 Parent(s): 616eb33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -1
app.py CHANGED
@@ -47,12 +47,69 @@ SELECTED_FEATURES = [
47
  ]
48
 
49
  N_FEATURES = len(SELECTED_FEATURES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
51
  # =========================
52
  # PREDICTION FUNCTION
53
  # =========================
54
  def predict_csv(file):
55
- df = pd.read_csv(file)
56
 
57
  # Drop label columns if exist
58
  df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")
 
47
  ]
48
 
49
  N_FEATURES = len(SELECTED_FEATURES)
50
+ # CLEAN NUMERIC (same as training)
51
+ # =========================
52
+ def clean_numeric(val):
53
+ if pd.isna(val):
54
+ return None
55
+
56
+ val = str(val).strip()
57
+ val = re.sub(r"\s+", "", val)
58
+
59
+ # scientific notation
60
+ if re.match(r"^-?\d+(\.\d+)?[eE][+-]?\d+$", val):
61
+ return float(val)
62
+
63
+ # remove thousand separators
64
+ if val.count(".") > 1:
65
+ val = val.replace(".", "")
66
+
67
+ # comma decimal -> dot
68
+ if "," in val and "." not in val:
69
+ val = val.replace(",", ".")
70
+
71
+ try:
72
+ return float(val)
73
+ except ValueError:
74
+ return None
75
+
76
+
77
+ # =========================
78
+ # LOAD & PREPROCESS CSV
79
+ # =========================
80
+ def load_and_clean_csv(file):
81
+ # 1. Read CSV (auto detect delimiter)
82
+ df = pd.read_csv(
83
+ file.name,
84
+ sep=None,
85
+ engine="python",
86
+ dtype=str
87
+ )
88
+
89
+ # 2. Clean header
90
+ df.columns = (
91
+ df.columns
92
+ .astype(str)
93
+ .str.strip()
94
+ .str.replace(r"\s+", "", regex=True)
95
+ )
96
+
97
+ # 3. Drop label columns if exist
98
+ df = df.drop(
99
+ columns=["Label", "label", "class", "Class", "file_name"],
100
+ errors="ignore"
101
+ )
102
+
103
+ # 4. Clean numeric values
104
+ for col in df.columns:
105
+ df[col] = df[col].apply(clean_numeric)
106
 
107
+ return df
108
  # =========================
109
  # PREDICTION FUNCTION
110
  # =========================
111
  def predict_csv(file):
112
+ df = load_and_clean_csv(file)
113
 
114
  # Drop label columns if exist
115
  df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")