hieu3636 commited on
Commit
ff5d94a
·
verified ·
1 Parent(s): 9f19f6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -40,18 +40,35 @@ def clean_numeric(val):
40
  except ValueError:
41
  return None
42
 
43
-
44
- # ======================
45
- # PREDICTION FUNCTION
46
- # ======================
47
- def predict_malware_csv(file):
48
  df = pd.read_csv(
49
  file.name,
50
- sep=';',
51
  engine='python',
52
  dtype=str
53
  )
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Check missing features
56
  missing = set(feature_names) - set(df.columns)
57
  if missing:
@@ -107,7 +124,6 @@ app = gr.Interface(
107
  title="Stacking-based Malware Detection",
108
  description=(
109
  "Upload a CSV file.\n\n"
110
- f"Required features: {', '.join(feature_names)}"
111
  )
112
  )
113
 
 
40
  except ValueError:
41
  return None
42
 
43
+ def load_and_clean_csv(file):
 
 
 
 
44
  df = pd.read_csv(
45
  file.name,
46
+ sep=None,
47
  engine='python',
48
  dtype=str
49
  )
50
 
51
+ # clean header
52
+ df.columns = (
53
+ df.columns
54
+ .astype(str)
55
+ .str.strip()
56
+ .str.replace(r'\s+', '', regex=True)
57
+ )
58
+
59
+ # clean numeric values
60
+ for col in df.columns:
61
+ if col not in ['Label', 'file_name']:
62
+ df[col] = df[col].apply(clean_numeric)
63
+
64
+ return df
65
+
66
+ # ======================
67
+ # PREDICTION FUNCTION
68
+ # ======================
69
+ def predict_malware_csv(file):
70
+ df = load_and_clean_csv(file)
71
+
72
  # Check missing features
73
  missing = set(feature_names) - set(df.columns)
74
  if missing:
 
124
  title="Stacking-based Malware Detection",
125
  description=(
126
  "Upload a CSV file.\n\n"
 
127
  )
128
  )
129