Spaces:
Sleeping
Sleeping
| import csv | |
| import time | |
| from pprint import pprint | |
| import Pinpoint_Internal.FeatureExtraction | |
| from Pinpoint_Internal.RandomForest import * | |
| class predictor(): | |
| def __init__(self): | |
| self.model = random_forest() | |
| self.model.PSYCHOLOGICAL_SIGNALS_ENABLED = False # Needs LIWC markup | |
| self.model.BEHAVIOURAL_FEATURES_ENABLED = False | |
| self.model.train_model(features_file=None, force_new_dataset=False, | |
| model_location=r"far-right-radical-language.model") | |
| self.dict_of_users_all = {} | |
| self.feature_extractor = Pinpoint_Internal.FeatureExtraction.feature_extraction( | |
| violent_words_dataset_location="swears", | |
| baseline_training_dataset_location="LIWC2015 Results (Storm_Front_Posts).csv") | |
| def predict(self, string_to_predict): | |
| self.__init__() | |
| try: | |
| os.remove("./messages.json") | |
| except: | |
| pass | |
| try: | |
| os.remove("messages.json") | |
| except: | |
| pass | |
| try: | |
| os.remove("./all-messages.csv") | |
| except: | |
| pass | |
| users_posts = [{"username": "tmp", "timestamp": "tmp", "message": "{}".format(string_to_predict)}] | |
| with open('all-messages.csv', 'w', encoding='utf8', newline='') as output_file: | |
| writer = csv.DictWriter(output_file, fieldnames=["username", "timestamp", "message"]) | |
| for users_post in users_posts: | |
| writer.writerow(users_post) | |
| self.feature_extractor._get_standard_tweets("all-messages.csv") | |
| with open("./messages.json", 'w') as outfile: | |
| features = self.feature_extractor.completed_tweet_user_features | |
| json.dump(features, outfile, indent=4) | |
| rows = self.model.get_features_as_df("./messages.json", True) | |
| rows.pop("is_extremist") | |
| iter = 0 | |
| message_vector_list = [] | |
| for user_iter in range(0, len(users_posts)): | |
| rows_as_json = json.loads(rows.iloc[iter].to_json()) | |
| tmp = [] | |
| for i in range(1, 201): | |
| vect_str = "message_vector_{}".format(str(i)) | |
| vector = rows_as_json[vect_str] | |
| tmp.append(vector) | |
| message_vector_list.append(tmp) | |
| iter = iter + 1 | |
| for row in users_posts: | |
| user = row["username"] | |
| timestamp = row["timestamp"] | |
| message = row["message"] | |
| user_unique_id = str(self.feature_extractor._get_unique_id_from_username(user)) | |
| iter = 0 | |
| user_found = False | |
| while not user_found: | |
| try: | |
| user_features = self.feature_extractor.completed_tweet_user_features[iter][user_unique_id] | |
| user_found = True | |
| break | |
| except KeyError as e: | |
| iter = iter + 1 | |
| formated_vectors = [float('%.10f' % elem) for elem in user_features["message_vector"]] | |
| iter = 0 | |
| for vector_list in message_vector_list: | |
| if message_vector_list[iter] == formated_vectors: | |
| is_extremist = self.model.model.predict([rows.iloc[iter]]) | |
| if is_extremist == 1: | |
| return True | |
| else: | |
| return False |