import os import torch root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_raw = os.path.join(root_dir, "data", "raw") data_processed = os.path.join(root_dir, "data", "processed") checkpoints = os.path.join(root_dir, "models", "checkpoints") dataset_csv = os.path.join(data_raw, "twitter_human_bots.csv") glove_dir = os.path.join(data_raw, "glove") glove_file = os.path.join(glove_dir, "glove.twitter.27B.200d.txt") def get_device(): if torch.cuda.is_available(): return torch.device("cuda") if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return torch.device("mps") return torch.device("cpu") device = get_device() numeric_features = [ "followers_count", "friends_count", "statuses_count", "favourites_count", "account_age_days", "average_tweets_per_day", "log_followers_count", "log_friends_count", "log_statuses_count", "log_favourites_count", "log_tweets_per_follower", "log_followers_to_friends_ratio", "followers_to_friends_ratio", "favourites_to_statuses_ratio", "friends_to_followers_ratio", "statuses_to_followers_ratio", "verified", "default_profile", "default_profile_image", "has_description", "has_location", "profile_completeness", "description_length", "screen_name_length", "screen_name_digits", "screen_name_digit_ratio", "screen_name_has_underscore", "tweets_per_follower", "tweets_per_day_per_follower", "bio_url_count", "bio_hashtag_count", "bio_mention_count", "bio_word_count", "bio_has_news_keywords", "bio_has_org_keywords", "bio_likely_organisation", "is_established_account", ] num_numeric_features = len(numeric_features) batch_size = 64 max_epochs = 50 patience = 7 learning_rate = 1e-3 weight_decay = 1e-5 dropout = 0.3 max_seq_len = 128 max_vocab_size = 50_000 glove_dim = 200 cnn_filters = [3, 4, 5] cnn_num_filters = 100 rnn_hidden = 128 rnn_layers = 1 label_map = {"human": 0, "bot": 1} inv_label_map = {0: "human", 1: "bot"}