AliMustapha commited on
Commit
e50c7a7
·
1 Parent(s): 3bda1a7

change none value in data to be np.null

Browse files
Dictionary_guesser/name_nation_guesser.py CHANGED
@@ -114,7 +114,7 @@ class NameNationGuesser:
114
  dtype = self.names_data_dtype
115
  jsonApi = os.getenv('jsonApi')
116
  bucket_name = os.getenv('name')
117
- file_name = os.getenv('f_name=')
118
 
119
  service_account_info = json.loads(jsonApi)
120
  client = storage.Client.from_service_account_info(service_account_info)
 
114
  dtype = self.names_data_dtype
115
  jsonApi = os.getenv('jsonApi')
116
  bucket_name = os.getenv('name')
117
+ file_name = os.getenv('f_name')
118
 
119
  service_account_info = json.loads(jsonApi)
120
  client = storage.Client.from_service_account_info(service_account_info)
get_region.py CHANGED
@@ -11,8 +11,8 @@ from utils import data_utils
11
 
12
 
13
  class RegionPredictor:
14
- def __init__(self, models_directory,names_filename="Dictionary_guesser/names.csv",places_filename='Dictionary_guesser/places.tab'):
15
- self.guesser =NameNationGuesser(names_filename=names_filename,places_filename=places_filename, guess_first_second_min_mag=None,place_column_name="sub-region")
16
  self.models_directory = models_directory
17
 
18
  @classmethod
@@ -61,7 +61,6 @@ class RegionPredictor:
61
  def guess_zone(self,name, epoch, offset):
62
  dt = datetime.datetime.fromtimestamp(epoch)
63
  country_pop_map = self.guesser.country_pop_from_datetime(dt, offset)
64
- # print(country_pop_map)
65
  return self.guesser.guess_zone(name, country_pop_map=country_pop_map)
66
 
67
  def get_Dictionary_Based_Region(self,df):
@@ -84,17 +83,19 @@ class RegionPredictor:
84
  name = data_utils.text_to_romanize(name)
85
  name = data_utils.remove_spaces_from_ends(name)
86
  return name
87
- # Apply the transformation function to the entire DataFrame
88
  dataset['Author'] = dataset['Author'].apply(transform_string)
89
- # dataset.to_csv("check.csv")
90
  dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
 
91
  dataset=self.get_Dictionary_Based_Region(dataset)
92
  model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/")
93
  y_pred_F1,y_pred_ROC=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
94
  dataset["region-prediction_F1"]=y_pred_F1
95
  dataset["region-prediction_ROC"]=y_pred_ROC
96
 
97
- data["region-prediction"] = data.apply(lambda row: row["region_Dictionary"] if row["region_Dictionary"] else row["region-prediction_F1"] , axis=1)
 
 
98
 
99
 
100
  Europe=dataset[dataset["region-prediction"]=="Europe"]
@@ -126,10 +127,12 @@ class RegionPredictor:
126
  Oceania["sub-region-prediction_ROC"]="Australia and New Zealand"
127
  if not Africa.empty:
128
  Africa["sub-region-prediction_F1"]=Africa["region-prediction"]
129
- Africa["sub-region-prediction_ROC"]=Africa["region-prediction"]
 
130
  data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
 
131
  data["sub-region-prediction"] = data.apply(lambda row: row["sub-region_Dictionary"] if row["sub-region_Dictionary"] else row["sub-region-prediction_F1"] , axis=1)
132
-
133
  return data
134
 
135
 
 
11
 
12
 
13
  class RegionPredictor:
14
+ def __init__(self, models_directory,places_filename='Dictionary_guesser/places.tab'):
15
+ self.guesser =NameNationGuesser(places_filename=places_filename, guess_first_second_min_mag=None,place_column_name="sub-region")
16
  self.models_directory = models_directory
17
 
18
  @classmethod
 
61
  def guess_zone(self,name, epoch, offset):
62
  dt = datetime.datetime.fromtimestamp(epoch)
63
  country_pop_map = self.guesser.country_pop_from_datetime(dt, offset)
 
64
  return self.guesser.guess_zone(name, country_pop_map=country_pop_map)
65
 
66
  def get_Dictionary_Based_Region(self,df):
 
83
  name = data_utils.text_to_romanize(name)
84
  name = data_utils.remove_spaces_from_ends(name)
85
  return name
86
+
87
  dataset['Author'] = dataset['Author'].apply(transform_string)
 
88
  dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
89
+
90
  dataset=self.get_Dictionary_Based_Region(dataset)
91
  model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/")
92
  y_pred_F1,y_pred_ROC=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
93
  dataset["region-prediction_F1"]=y_pred_F1
94
  dataset["region-prediction_ROC"]=y_pred_ROC
95
 
96
+
97
+ dataset['region_Dictionary'] = dataset['region_Dictionary'].apply(lambda x: x if pd.notna(x) else None)
98
+ dataset["region-prediction"] = dataset.apply(lambda row: row["region_Dictionary"] if row["region_Dictionary"] else row["region-prediction_F1"] , axis=1)
99
 
100
 
101
  Europe=dataset[dataset["region-prediction"]=="Europe"]
 
127
  Oceania["sub-region-prediction_ROC"]="Australia and New Zealand"
128
  if not Africa.empty:
129
  Africa["sub-region-prediction_F1"]=Africa["region-prediction"]
130
+ Africa["sub-region-prediction_ROC"]=Africa["region-prediction"]
131
+
132
  data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
133
+ data["sub-region_Dictionary"] = data["sub-region_Dictionary"].apply(lambda x: x if pd.notna(x) else None)
134
  data["sub-region-prediction"] = data.apply(lambda row: row["sub-region_Dictionary"] if row["sub-region_Dictionary"] else row["sub-region-prediction_F1"] , axis=1)
135
+
136
  return data
137
 
138