AliMustapha commited on
Commit
e6f4341
·
1 Parent(s): 2c472ee

add region predictor class

Browse files
Files changed (1) hide show
  1. get_region.py +82 -0
get_region.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __copyright__ = "Copyright (C) 2023 Ali Mustapha"
2
+ __license__ = "GPL-3.0-or-later"
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ import tensorflow as tf
7
+ import pickle
8
+
9
+ class RegionPredictor:
10
+ def __init__(self, models_directory):
11
+ self.models_directory = models_directory
12
+
13
+ def load_model(self, path):
14
+ model = tf.keras.models.load_model(path+"bestmodel.tf")
15
+ #compile and train the model
16
+ model.compile(
17
+ loss = tf.keras.losses.categorical_crossentropy,
18
+ optimizer=tf.keras.optimizers.Adam(),
19
+ metrics=['accuracy'])
20
+ with open(self.models_directory+'label_encoder.pkl', 'rb') as file:
21
+ label_encoder = pickle.load(file)
22
+ with open(self.models_directory+'optimal_thresholds_f1.pkl', 'rb') as file:
23
+ optF1 = pickle.load(file)
24
+ with open(self.models_directory+'optimal_thresholds_ROC.pkl', 'rb') as file:
25
+ optROC = pickle.load(file)
26
+ return model,label_encoder,optF1,optROC
27
+
28
+
29
+ def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
30
+ input_Full_name=np.asarray(dataset['Full_Name']).astype('str')
31
+ input_offset=np.asarray(dataset['offset']).astype('float')
32
+ predictions_proba = model.predict({
33
+ "input_text": input_Full_name,
34
+ "input_offset": input_offset
35
+ })
36
+ # predictions = np.argmax(predictions_proba,axis=1)
37
+ y_pred_F1=[]
38
+ y_pred_ROC=[]
39
+
40
+ if optF1 is not None:
41
+ y_pred_F1 = (predictions_proba >= optF1).astype(int)
42
+ y_pred_F1=np.argmax(y_pred_F1,axis=1)
43
+ y_pred_F1 = label_encoder.inverse_transform(y_pred_F1)
44
+
45
+ if optROC is not None:
46
+ y_pred_ROC = (predictions_proba >= optROC).astype(int)
47
+ y_pred_ROC=np.argmax(y_pred_ROC,axis=1)
48
+
49
+ y_pred_ROC = label_encoder.inverse_transform(y_pred_ROC)
50
+
51
+ return y_pred_F1,y_pred_ROC
52
+
53
+ def get_region(self,dataset):
54
+ model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/region/files/")
55
+ y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
56
+ dataset["region-prediction"]=y_pred
57
+ Europe=dataset[dataset["region-prediction"]=="Europe"]
58
+ Africa=dataset[dataset["region-prediction"]=="Africa"]
59
+ Asia=dataset[dataset["region-prediction"]=="Asia"]
60
+ Americas=dataset[dataset["region-prediction"]=="Americas"]
61
+ Oceania=dataset[dataset["region-prediction"]=="Oceania"]
62
+
63
+ model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Europe/files/")
64
+ y_pred,_=self.model_prediction(Europe,model,label_encoder,optF1,optROC)
65
+ Europe["sub-region-prediction"]=y_pred
66
+ model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Asia/files/")
67
+ y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
68
+ Asia["sub-region-prediction"]=y_pred
69
+ model,label_encoder,optF1,optROC=self.loadModel(self.models_directory+"/Americas/files/")
70
+ y_pred=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
71
+ Americas["sub-region-prediction"]=y_pred
72
+ Oceania["sub-region-prediction"]="Australia and New Zealand"
73
+ Africa["sub-region-prediction"]="Africa"
74
+ data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
75
+ return data
76
+
77
+
78
+
79
+
80
+
81
+
82
+