albin commited on
Commit
1127d65
·
1 Parent(s): c9f777a

remove url parameter

Browse files
extraction_features.py CHANGED
@@ -15,7 +15,7 @@ def extract_features(url):
15
  features = {}
16
 
17
  # URL
18
- features['URL'] = url
19
 
20
  # URLLength
21
  features['URLLength'] = len(url)
 
15
  features = {}
16
 
17
  # URL
18
+ # features['URL'] = url
19
 
20
  # URLLength
21
  features['URLLength'] = len(url)
label_encoders.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c355826d22387c337f5bf8ca6ae18fe881c1b3cc2465772f740fa3c49b726469
3
- size 17633409
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db2d4c2f44297075aeec0b104794135b2bae52288f102b21b35fd7b45649d2e
3
+ size 6961649
logistic_regression_model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d3509d063e937fc6c03b386580246dfbd60827c871dc6394115bde984e62052
3
- size 1483
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7bc11b6b53b1a34501d639a64111d205d408e50fd5ba09bbfdb8018434e2cf
3
+ size 1451
main.py CHANGED
@@ -29,7 +29,7 @@ model = joblib.load('logistic_regression_model.pkl')
29
  label_encoders = joblib.load('label_encoders.pkl')
30
  # Columns used in the model
31
  selected_columns = [
32
- 'URL', 'URLLength', 'Domain', 'DomainLength', 'TLD',
33
  'CharContinuationRate', 'TLDLength', 'NoOfSubDomain',
34
  'DegitRatioInURL', 'SpacialCharRatioInURL', 'IsHTTPS'
35
  ]
@@ -91,7 +91,7 @@ async def predict(request: Request, requess: Req = Depends(form_req)):
91
  dataFrame_features = pd.DataFrame([features])
92
 
93
  # Apply features encoding (convert everything into int64)
94
- for column in ['URL', 'Domain', 'TLD']:
95
  encoder = label_encoders[column]
96
  dataFrame_features[column] = dataFrame_features[column].apply(lambda x: safe_transform(encoder, x))
97
 
 
29
  label_encoders = joblib.load('label_encoders.pkl')
30
  # Columns used in the model
31
  selected_columns = [
32
+ 'URLLength', 'Domain', 'DomainLength', 'TLD',
33
  'CharContinuationRate', 'TLDLength', 'NoOfSubDomain',
34
  'DegitRatioInURL', 'SpacialCharRatioInURL', 'IsHTTPS'
35
  ]
 
91
  dataFrame_features = pd.DataFrame([features])
92
 
93
  # Apply features encoding (convert everything into int64)
94
+ for column in ['Domain', 'TLD']:
95
  encoder = label_encoders[column]
96
  dataFrame_features[column] = dataFrame_features[column].apply(lambda x: safe_transform(encoder, x))
97