Merge branch 'main' of https://huggingface.co/spaces/GIZ/eb-synergy-app
Browse files- functions/filter_projects.py +49 -0
- modules/navbar.py +40 -0
- requirements.txt +9 -0
- similarity_page.py +3 -1
functions/filter_projects.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from functions.semantic_search import search
|
| 3 |
+
|
| 4 |
+
def contains_code(crs_codes, code_list):
|
| 5 |
+
codes = str(crs_codes).split(';')
|
| 6 |
+
return any(code in code_list for code in codes)
|
| 7 |
+
|
| 8 |
+
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
|
| 9 |
+
# Check if filters where not all should be selected are empty
|
| 10 |
+
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
|
| 11 |
+
|
| 12 |
+
# FILTER CRS
|
| 13 |
+
if crs3_list and not crs5_list:
|
| 14 |
+
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
|
| 15 |
+
elif crs3_list and crs5_list:
|
| 16 |
+
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
| 17 |
+
elif not crs3_list and crs5_list:
|
| 18 |
+
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
| 19 |
+
|
| 20 |
+
# FILTER SDG
|
| 21 |
+
if sdg_str != "":
|
| 22 |
+
df = df[df["sgd_pred_code"] == int(sdg_str)]
|
| 23 |
+
|
| 24 |
+
# FILTER COUNTRY
|
| 25 |
+
if country_code_list != []:
|
| 26 |
+
country_filtered_df = pd.DataFrame()
|
| 27 |
+
for c in country_code_list:
|
| 28 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
| 29 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
| 30 |
+
|
| 31 |
+
df = country_filtered_df
|
| 32 |
+
|
| 33 |
+
# FILTER ORGANIZATION
|
| 34 |
+
if orga_code_list != []:
|
| 35 |
+
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
| 36 |
+
|
| 37 |
+
# FILTER QUERY
|
| 38 |
+
if query != "" and len(df) > 0:
|
| 39 |
+
if len(df) < TOP_X_PROJECTS:
|
| 40 |
+
TOP_X_PROJECTS = len(df)
|
| 41 |
+
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
return df
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
modules/navbar.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
|
| 3 |
+
import similarity_page
|
| 4 |
+
|
| 5 |
+
# giz-dsc colors
|
| 6 |
+
# orange: #e5b50d
|
| 7 |
+
# green: #48d47b
|
| 8 |
+
# blue: #0da2dc
|
| 9 |
+
# grey: #dadada
|
| 10 |
+
|
| 11 |
+
# giz colors https://www.giz.de/cdc/en/html/59638.html
|
| 12 |
+
# red: #c80f0f
|
| 13 |
+
# grey: #6f6f6f
|
| 14 |
+
# light_grey: #b2b2b2
|
| 15 |
+
# light_red: #eba1a3
|
| 16 |
+
|
| 17 |
+
def show_navbar():
|
| 18 |
+
#st.markdown("<h1 style='color: red;'>THIS APP IS WORK IN PROGRESS ...</h1>", unsafe_allow_html=True)
|
| 19 |
+
|
| 20 |
+
#st.title("Development Bank Synergy Mapper")
|
| 21 |
+
|
| 22 |
+
# enlarge tab fontsizes
|
| 23 |
+
css = '''
|
| 24 |
+
<style>
|
| 25 |
+
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
|
| 26 |
+
font-size:1rem;
|
| 27 |
+
}
|
| 28 |
+
</style>
|
| 29 |
+
'''
|
| 30 |
+
st.markdown(css, unsafe_allow_html=True)
|
| 31 |
+
tab1, tab2 = st.tabs([
|
| 32 |
+
"🔍 Multi-Project Matching",
|
| 33 |
+
"🎯 Single-Project Matching"
|
| 34 |
+
])
|
| 35 |
+
|
| 36 |
+
with tab1:
|
| 37 |
+
similarity_page.show_multi_matching_page()
|
| 38 |
+
|
| 39 |
+
with tab2:
|
| 40 |
+
similarity_page.show_single_matching_page()
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.26.4
|
| 2 |
+
pandas==2.1.4
|
| 3 |
+
streamlit==1.32.2
|
| 4 |
+
streamlit-option-menu==0.3.12
|
| 5 |
+
scipy==1.12.0
|
| 6 |
+
faiss-cpu==1.8.0
|
| 7 |
+
faiss-gpu==1.7.2
|
| 8 |
+
sentence-transformers==2.5.1
|
| 9 |
+
streamlit-aggrid==0.3.4.
|
similarity_page.py
CHANGED
|
@@ -17,13 +17,15 @@ from functions.filter_single import filter_single
|
|
| 17 |
from functions.calc_matches import calc_matches
|
| 18 |
from functions.same_country_filter import same_country_filter
|
| 19 |
from functions.single_similar import find_similar
|
| 20 |
-
import psutil
|
| 21 |
import os
|
| 22 |
import gc
|
| 23 |
|
|
|
|
| 24 |
def get_process_memory():
|
| 25 |
process = psutil.Process(os.getpid())
|
| 26 |
return process.memory_info().rss / (1024 * 1024)
|
|
|
|
| 27 |
|
| 28 |
# Catch DATA
|
| 29 |
# Load Similarity matrix
|
|
|
|
| 17 |
from functions.calc_matches import calc_matches
|
| 18 |
from functions.same_country_filter import same_country_filter
|
| 19 |
from functions.single_similar import find_similar
|
| 20 |
+
#import psutil
|
| 21 |
import os
|
| 22 |
import gc
|
| 23 |
|
| 24 |
+
"""
|
| 25 |
def get_process_memory():
|
| 26 |
process = psutil.Process(os.getpid())
|
| 27 |
return process.memory_info().rss / (1024 * 1024)
|
| 28 |
+
"""
|
| 29 |
|
| 30 |
# Catch DATA
|
| 31 |
# Load Similarity matrix
|