Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| from Functionalities import NLP_Helper | |
| class KeywordAdgroupPredict: | |
| def predict_ad_group_for_keywords(self, candidate_kw_df, cur_kw_df, features, new_kw_col, | |
| cur_kw_col, cur_adg_col, cur_camp_col, extra_stopwords=['_']) -> pd.DataFrame: | |
| """ | |
| predicts ad group from current keyword and their ad groups | |
| :param candidate_kw_df: keywords dataframe that we want to predict ad groups for | |
| :param cur_kw_df: keywords details dataframe from current campaign | |
| :param features: features/columns to use for finding relevancy | |
| :param new_kw_col: the column in candidate_kw_df which holds the keywords | |
| :param cur_kw_col: the column in cur_kw_df which holds the keywords | |
| :param cur_adg_col: the column in cur_kw_df which holds the corresponding ad groups | |
| :param cur_camp_col: the column in cur_kw_df which holds the corresponding campaigns | |
| :param extra_stopwords: any extra terms that we want to avoid while finding relevancy | |
| :return: dataframe similar to candidate_kw_df with two extra columns ['Recommended Ad group', 'relevance score'] | |
| """ | |
| output_cols = list(candidate_kw_df.columns) + ['Recommended Ad group', 'relevance score'] | |
| sentence_model = NLP_Helper.get_embedding_model() | |
| cur_kw_df = cur_kw_df[[cur_kw_col, cur_adg_col, cur_camp_col]] | |
| # cleanup | |
| cur_kw_df[cur_kw_col].replace('[\[\]\"\']', '', regex=True, inplace=True) | |
| # joining features per ad group to one column, for vectorization step | |
| cur_kw_df = cur_kw_df.groupby([cur_camp_col, cur_adg_col]).agg( | |
| {cur_kw_col: ', '.join}).reset_index() | |
| cur_kw_df['Ad group contents'] = cur_kw_df[features].astype(str).agg(' ,'.join, axis=1) | |
| NLP_Helper.remove_unnecessary_tokens_from_df(cur_kw_df, ['Ad group contents'], | |
| extra_stopwords=extra_stopwords) | |
| # vectorization | |
| candidate_kw_df['Keyword vector'] = NLP_Helper.str_to_vector_list( | |
| candidate_kw_df[new_kw_col].tolist(), sentence_model) | |
| cur_kw_df['Ad group vector'] = NLP_Helper.str_to_vector_list( | |
| cur_kw_df['Ad group contents'].tolist(), sentence_model) | |
| # cosine similarity calc | |
| cosine_sim_matrix = NLP_Helper.cosine_sim_matrix(candidate_kw_df['Keyword vector'], | |
| cur_kw_df['Ad group vector']) | |
| cosine_sim_max_indices = np.argmax(cosine_sim_matrix, axis=1) | |
| cosine_sim_max_values = np.amax(cosine_sim_matrix, axis=1) | |
| selected_ad_groups = cur_kw_df.iloc[cosine_sim_max_indices] | |
| selected_ad_groups['relevance score'] = cosine_sim_max_values | |
| selected_ad_groups.rename(columns={cur_adg_col: 'Recommended Ad group'}, inplace=True) | |
| selected_ad_groups.reset_index(inplace=True) | |
| # merge the columns | |
| new_kw_adgroup_relevance_df = pd.concat([candidate_kw_df, | |
| selected_ad_groups[['Recommended Ad group', 'relevance score']]], | |
| axis=1) | |
| new_kw_adgroup_relevance_df = new_kw_adgroup_relevance_df[list(output_cols)] | |
| new_kw_adgroup_relevance_df['relevance score'] = new_kw_adgroup_relevance_df['relevance score'].astype(float) | |
| new_kw_adgroup_relevance_df = new_kw_adgroup_relevance_df.sort_values(by='relevance score', | |
| ascending=False) | |
| return new_kw_adgroup_relevance_df | |