albin commited on
Commit
5fc419a
·
1 Parent(s): f5e9881

add function to manage encoding of new values

Browse files
Files changed (1) hide show
  1. main.py +9 -2
main.py CHANGED
@@ -34,6 +34,13 @@ selected_columns = [
34
  'DegitRatioInURL', 'SpacialCharRatioInURL', 'IsHTTPS'
35
  ]
36
 
 
 
 
 
 
 
 
37
  class Msg(BaseModel):
38
  msg: str
39
 
@@ -84,9 +91,9 @@ async def predict(request: Request, requess: Req = Depends(form_req)):
84
  dataFrame_features = pd.DataFrame([features])
85
 
86
  # Apply features encoding (convert everything into int64)
87
- for column in selected_columns:
88
  encoder = label_encoders[column]
89
- dataFrame_features[column] = encoder.transform(dataFrame_features[column])
90
 
91
  data = dataFrame_features[selected_columns].values
92
 
 
34
  'DegitRatioInURL', 'SpacialCharRatioInURL', 'IsHTTPS'
35
  ]
36
 
37
+ # Function to manage values for encoding (giving a new number for url which have never been seen)
38
+ def safe_transform(encoder, value):
39
+ if value in encoder.classes_:
40
+ return encoder.transform([value])[0]
41
+ else:
42
+ return -1 # Special code for unknown values
43
+
44
  class Msg(BaseModel):
45
  msg: str
46
 
 
91
  dataFrame_features = pd.DataFrame([features])
92
 
93
  # Apply features encoding (convert everything into int64)
94
+ for column in ['URL', 'Domain', 'TLD']:
95
  encoder = label_encoders[column]
96
+ dataFrame_features[column] = dataFrame_features[column].apply(lambda x: safe_transform(encoder, x))
97
 
98
  data = dataFrame_features[selected_columns].values
99