| import pandas as pd |
| from functions.semantic_search import search |
|
|
| def contains_code(crs_codes, code_list): |
| codes = str(crs_codes).split(';') |
| return any(code in code_list for code in codes) |
|
|
| def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30): |
| |
| if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "": |
|
|
| |
| if crs3_list and not crs5_list: |
| df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))] |
| elif crs3_list and crs5_list: |
| df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] |
| elif not crs3_list and crs5_list: |
| df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] |
|
|
| |
| if sdg_str != "": |
| df = df[df["sgd_pred_code"] == int(sdg_str)] |
|
|
| |
| if country_code_list != []: |
| country_filtered_df = pd.DataFrame() |
| for c in country_code_list: |
| c_df = df[df["country"].str.contains(c, na=False)] |
| country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False) |
| |
| df = country_filtered_df |
| |
| |
| if orga_code_list != []: |
| df = df[df['orga_abbreviation'].isin(orga_code_list)] |
|
|
| |
| if query != "" and len(df) > 0: |
| if len(df) < TOP_X_PROJECTS: |
| TOP_X_PROJECTS = len(df) |
| df = search(query, model, embeddings, df, TOP_X_PROJECTS) |
|
|
|
|
|
|
| return df |
| |
| |
| |
|
|
|
|