albin commited on
Commit
324e0e0
·
1 Parent(s): 4a2b74d

modify features encryption

Browse files
Files changed (2) hide show
  1. label_encoders.pkl +3 -0
  2. main.py +31 -15
label_encoders.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6828590f38440abf21c24157ec2c7ab8e25bf1100a277918afd2f5ead03f6912
3
+ size 7719889
main.py CHANGED
@@ -8,6 +8,8 @@ from fastapi.middleware.cors import CORSMiddleware
8
  from pydantic import BaseModel
9
  import pickle
10
  import joblib
 
 
11
  from extraction_features import extract_features
12
 
13
  app = FastAPI()
@@ -24,7 +26,13 @@ app.add_middleware(
24
  # model_file = open('logistic_regression_model.pkl', 'rb')
25
  # model = pickle.load(model_file, encoding='bytes')
26
  model = joblib.load('logistic_regression_model.pkl')
27
-
 
 
 
 
 
 
28
 
29
  class Msg(BaseModel):
30
  msg: str
@@ -73,7 +81,17 @@ async def predict(request: Request, requess: Req = Depends(form_req)):
73
  url = requess.url
74
 
75
  features = extract_features(str(url))
76
- data = []
 
 
 
 
 
 
 
 
 
 
77
  # data.append(str(features['URL']))
78
  # data.extend(int(features['URLLength']))
79
  # data.extend(str(features['Domain']))
@@ -85,19 +103,17 @@ async def predict(request: Request, requess: Req = Depends(form_req)):
85
  # data.extend(float(features['DegitRatioInURL']))
86
  # data.extend(float(features['SpacialCharRatioInURL']))
87
  # data.extend(int(features['IsHTTPS']))
88
- data.append(features['URL'])
89
- data.append(features['URLLength'])
90
- data.append(features['Domain'])
91
- data.append(features['DomainLength'])
92
- data.append(features['TLD'])
93
- data.append(features['CharContinuationRate'])
94
- data.append(features['TLDLength'])
95
- data.append(features['NoOfSubDomain'])
96
- data.append(features['DegitRatioInURL'])
97
- data.append(features['SpacialCharRatioInURL'])
98
- data.append(features['IsHTTPS'])
99
- # Convert every feature into numeric value
100
- data = [float(x) if isinstance(x, str) else x for x in data]
101
 
102
  prediction = model.predict([data])
103
  output = prediction[0]
 
8
  from pydantic import BaseModel
9
  import pickle
10
  import joblib
11
+ import pandas as pd
12
+ from sklearn.preprocessing import LabelEncoder
13
  from extraction_features import extract_features
14
 
15
  app = FastAPI()
 
26
  # model_file = open('logistic_regression_model.pkl', 'rb')
27
  # model = pickle.load(model_file, encoding='bytes')
28
  model = joblib.load('logistic_regression_model.pkl')
29
+ label_encoders = joblib.load('label_encoders.pkl')
30
+ # Columns used in the model
31
+ selected_columns = [
32
+ 'URL', 'URLLength', 'Domain', 'DomainLength', 'TLD',
33
+ 'CharContinuationRate', 'TLDLength', 'NoOfSubDomain',
34
+ 'DegitRatioInURL', 'SpacialCharRatioInURL', 'IsHTTPS'
35
+ ]
36
 
37
  class Msg(BaseModel):
38
  msg: str
 
81
  url = requess.url
82
 
83
  features = extract_features(str(url))
84
+ dataFrame_features = pd.DataFrame([features])
85
+
86
+ # Apply features encoding (convert everything into int64)
87
+ for column in selected_columns:
88
+ if column in label_encoders:
89
+ encoder = label_encoders[column]
90
+ dataFrame_features[column] = encoder.transform(dataFrame_features[column])
91
+
92
+ data = dataFrame_features[selected_columns].values
93
+
94
+ # data = []
95
  # data.append(str(features['URL']))
96
  # data.extend(int(features['URLLength']))
97
  # data.extend(str(features['Domain']))
 
103
  # data.extend(float(features['DegitRatioInURL']))
104
  # data.extend(float(features['SpacialCharRatioInURL']))
105
  # data.extend(int(features['IsHTTPS']))
106
+ # data.append(features['URL'])
107
+ # data.append(features['URLLength'])
108
+ # data.append(features['Domain'])
109
+ # data.append(features['DomainLength'])
110
+ # data.append(features['TLD'])
111
+ # data.append(features['CharContinuationRate'])
112
+ # data.append(features['TLDLength'])
113
+ # data.append(features['NoOfSubDomain'])
114
+ # data.append(features['DegitRatioInURL'])
115
+ # data.append(features['SpacialCharRatioInURL'])
116
+ # data.append(features['IsHTTPS'])
 
 
117
 
118
  prediction = model.predict([data])
119
  output = prediction[0]