Spaces:
Runtime error
Runtime error
Commit ·
e50c7a7
1
Parent(s): 3bda1a7
change none value in data to be np.null
Browse files- Dictionary_guesser/name_nation_guesser.py +1 -1
- get_region.py +11 -8
Dictionary_guesser/name_nation_guesser.py
CHANGED
|
@@ -114,7 +114,7 @@ class NameNationGuesser:
|
|
| 114 |
dtype = self.names_data_dtype
|
| 115 |
jsonApi = os.getenv('jsonApi')
|
| 116 |
bucket_name = os.getenv('name')
|
| 117 |
-
file_name = os.getenv('f_name
|
| 118 |
|
| 119 |
service_account_info = json.loads(jsonApi)
|
| 120 |
client = storage.Client.from_service_account_info(service_account_info)
|
|
|
|
| 114 |
dtype = self.names_data_dtype
|
| 115 |
jsonApi = os.getenv('jsonApi')
|
| 116 |
bucket_name = os.getenv('name')
|
| 117 |
+
file_name = os.getenv('f_name')
|
| 118 |
|
| 119 |
service_account_info = json.loads(jsonApi)
|
| 120 |
client = storage.Client.from_service_account_info(service_account_info)
|
get_region.py
CHANGED
|
@@ -11,8 +11,8 @@ from utils import data_utils
|
|
| 11 |
|
| 12 |
|
| 13 |
class RegionPredictor:
|
| 14 |
-
def __init__(self, models_directory,
|
| 15 |
-
self.guesser =NameNationGuesser(
|
| 16 |
self.models_directory = models_directory
|
| 17 |
|
| 18 |
@classmethod
|
|
@@ -61,7 +61,6 @@ class RegionPredictor:
|
|
| 61 |
def guess_zone(self,name, epoch, offset):
|
| 62 |
dt = datetime.datetime.fromtimestamp(epoch)
|
| 63 |
country_pop_map = self.guesser.country_pop_from_datetime(dt, offset)
|
| 64 |
-
# print(country_pop_map)
|
| 65 |
return self.guesser.guess_zone(name, country_pop_map=country_pop_map)
|
| 66 |
|
| 67 |
def get_Dictionary_Based_Region(self,df):
|
|
@@ -84,17 +83,19 @@ class RegionPredictor:
|
|
| 84 |
name = data_utils.text_to_romanize(name)
|
| 85 |
name = data_utils.remove_spaces_from_ends(name)
|
| 86 |
return name
|
| 87 |
-
|
| 88 |
dataset['Author'] = dataset['Author'].apply(transform_string)
|
| 89 |
-
# dataset.to_csv("check.csv")
|
| 90 |
dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
|
|
|
|
| 91 |
dataset=self.get_Dictionary_Based_Region(dataset)
|
| 92 |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/")
|
| 93 |
y_pred_F1,y_pred_ROC=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
|
| 94 |
dataset["region-prediction_F1"]=y_pred_F1
|
| 95 |
dataset["region-prediction_ROC"]=y_pred_ROC
|
| 96 |
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
Europe=dataset[dataset["region-prediction"]=="Europe"]
|
|
@@ -126,10 +127,12 @@ class RegionPredictor:
|
|
| 126 |
Oceania["sub-region-prediction_ROC"]="Australia and New Zealand"
|
| 127 |
if not Africa.empty:
|
| 128 |
Africa["sub-region-prediction_F1"]=Africa["region-prediction"]
|
| 129 |
-
Africa["sub-region-prediction_ROC"]=Africa["region-prediction"]
|
|
|
|
| 130 |
data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
|
|
|
|
| 131 |
data["sub-region-prediction"] = data.apply(lambda row: row["sub-region_Dictionary"] if row["sub-region_Dictionary"] else row["sub-region-prediction_F1"] , axis=1)
|
| 132 |
-
|
| 133 |
return data
|
| 134 |
|
| 135 |
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
class RegionPredictor:
|
| 14 |
+
def __init__(self, models_directory,places_filename='Dictionary_guesser/places.tab'):
|
| 15 |
+
self.guesser =NameNationGuesser(places_filename=places_filename, guess_first_second_min_mag=None,place_column_name="sub-region")
|
| 16 |
self.models_directory = models_directory
|
| 17 |
|
| 18 |
@classmethod
|
|
|
|
| 61 |
def guess_zone(self,name, epoch, offset):
|
| 62 |
dt = datetime.datetime.fromtimestamp(epoch)
|
| 63 |
country_pop_map = self.guesser.country_pop_from_datetime(dt, offset)
|
|
|
|
| 64 |
return self.guesser.guess_zone(name, country_pop_map=country_pop_map)
|
| 65 |
|
| 66 |
def get_Dictionary_Based_Region(self,df):
|
|
|
|
| 83 |
name = data_utils.text_to_romanize(name)
|
| 84 |
name = data_utils.remove_spaces_from_ends(name)
|
| 85 |
return name
|
| 86 |
+
|
| 87 |
dataset['Author'] = dataset['Author'].apply(transform_string)
|
|
|
|
| 88 |
dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
|
| 89 |
+
|
| 90 |
dataset=self.get_Dictionary_Based_Region(dataset)
|
| 91 |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/")
|
| 92 |
y_pred_F1,y_pred_ROC=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
|
| 93 |
dataset["region-prediction_F1"]=y_pred_F1
|
| 94 |
dataset["region-prediction_ROC"]=y_pred_ROC
|
| 95 |
|
| 96 |
+
|
| 97 |
+
dataset['region_Dictionary'] = dataset['region_Dictionary'].apply(lambda x: x if pd.notna(x) else None)
|
| 98 |
+
dataset["region-prediction"] = dataset.apply(lambda row: row["region_Dictionary"] if row["region_Dictionary"] else row["region-prediction_F1"] , axis=1)
|
| 99 |
|
| 100 |
|
| 101 |
Europe=dataset[dataset["region-prediction"]=="Europe"]
|
|
|
|
| 127 |
Oceania["sub-region-prediction_ROC"]="Australia and New Zealand"
|
| 128 |
if not Africa.empty:
|
| 129 |
Africa["sub-region-prediction_F1"]=Africa["region-prediction"]
|
| 130 |
+
Africa["sub-region-prediction_ROC"]=Africa["region-prediction"]
|
| 131 |
+
|
| 132 |
data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
|
| 133 |
+
data["sub-region_Dictionary"] = data["sub-region_Dictionary"].apply(lambda x: x if pd.notna(x) else None)
|
| 134 |
data["sub-region-prediction"] = data.apply(lambda row: row["sub-region_Dictionary"] if row["sub-region_Dictionary"] else row["sub-region-prediction_F1"] , axis=1)
|
| 135 |
+
|
| 136 |
return data
|
| 137 |
|
| 138 |
|