Jan Mühlnikel
commited on
Commit
·
cee7d6e
1
Parent(s):
59435af
return to commit 82f1167e2b824e8eed9a8c440714d67efd84726b
Browse files- app.py +0 -1
- functions/filter_projects.py +3 -8
- modules/navbar.py +1 -0
- requirements.txt +1 -2
- similarity_page.py +4 -10
- src/embeddings.pkl +3 -0
- src/extended_similarities.npz +2 -2
- src/extended_similarities_nonsimorga.npz +2 -2
- src/projects/project_region.csv +1 -1
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
|
| 3 |
# PAGE CONFIG
|
| 4 |
st.set_page_config(
|
| 5 |
page_title='Development Banks Collaboration Analyzer',
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
# PAGE CONFIG
|
| 3 |
st.set_page_config(
|
| 4 |
page_title='Development Banks Collaboration Analyzer',
|
functions/filter_projects.py
CHANGED
|
@@ -5,13 +5,9 @@ def contains_code(crs_codes, code_list):
|
|
| 5 |
codes = str(crs_codes).split(';')
|
| 6 |
return any(code in code_list for code in codes)
|
| 7 |
|
| 8 |
-
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
| 9 |
-
#query,
|
| 10 |
-
model,
|
| 11 |
-
#embeddings,
|
| 12 |
-
TOP_X_PROJECTS=30):
|
| 13 |
# Check if filters where not all should be selected are empty
|
| 14 |
-
if crs3_list != [] or crs5_list != [] or sdg_str != ""
|
| 15 |
|
| 16 |
# FILTER CRS
|
| 17 |
if crs3_list and not crs5_list:
|
|
@@ -39,12 +35,11 @@ def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_c
|
|
| 39 |
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
| 40 |
|
| 41 |
# FILTER QUERY
|
| 42 |
-
"""
|
| 43 |
if query != "" and len(df) > 0:
|
| 44 |
if len(df) < TOP_X_PROJECTS:
|
| 45 |
TOP_X_PROJECTS = len(df)
|
| 46 |
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
| 47 |
-
|
| 48 |
|
| 49 |
|
| 50 |
return df
|
|
|
|
| 5 |
codes = str(crs_codes).split(';')
|
| 6 |
return any(code in code_list for code in codes)
|
| 7 |
|
| 8 |
+
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# Check if filters where not all should be selected are empty
|
| 10 |
+
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
|
| 11 |
|
| 12 |
# FILTER CRS
|
| 13 |
if crs3_list and not crs5_list:
|
|
|
|
| 35 |
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
| 36 |
|
| 37 |
# FILTER QUERY
|
|
|
|
| 38 |
if query != "" and len(df) > 0:
|
| 39 |
if len(df) < TOP_X_PROJECTS:
|
| 40 |
TOP_X_PROJECTS = len(df)
|
| 41 |
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
| 42 |
+
|
| 43 |
|
| 44 |
|
| 45 |
return df
|
modules/navbar.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
import similarity_page
|
| 3 |
|
| 4 |
# giz-dsc colors
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
|
| 3 |
import similarity_page
|
| 4 |
|
| 5 |
# giz-dsc colors
|
requirements.txt
CHANGED
|
@@ -6,5 +6,4 @@ scipy==1.12.0
|
|
| 6 |
faiss-cpu==1.8.0
|
| 7 |
faiss-gpu==1.7.2
|
| 8 |
sentence-transformers==2.5.1
|
| 9 |
-
streamlit-aggrid==0.3.4
|
| 10 |
-
psutil==5.9.0
|
|
|
|
| 6 |
faiss-cpu==1.8.0
|
| 7 |
faiss-gpu==1.7.2
|
| 8 |
sentence-transformers==2.5.1
|
| 9 |
+
streamlit-aggrid==0.3.4.
|
|
|
similarity_page.py
CHANGED
|
@@ -111,7 +111,6 @@ def load_model():
|
|
| 111 |
return model
|
| 112 |
|
| 113 |
# Load Embeddings
|
| 114 |
-
"""
|
| 115 |
@st.cache_data
|
| 116 |
def load_embeddings_and_index():
|
| 117 |
# Load embeddings
|
|
@@ -120,7 +119,7 @@ def load_embeddings_and_index():
|
|
| 120 |
embeddings = stored_data["embeddings"]
|
| 121 |
|
| 122 |
return embeddings
|
| 123 |
-
|
| 124 |
|
| 125 |
# USE CACHE FUNCTIONS
|
| 126 |
sim_matrix = load_sim_matrix()
|
|
@@ -135,7 +134,7 @@ COUNTRY_OPTION_LIST = getCountry()
|
|
| 135 |
|
| 136 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
| 137 |
model = load_model()
|
| 138 |
-
|
| 139 |
|
| 140 |
def show_multi_matching_page():
|
| 141 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
|
@@ -199,7 +198,7 @@ def show_multi_matching_page():
|
|
| 199 |
)
|
| 200 |
|
| 201 |
# SEARCH BOX
|
| 202 |
-
|
| 203 |
|
| 204 |
with col3:
|
| 205 |
# COUNTRY SELECTION
|
|
@@ -243,11 +242,7 @@ def show_multi_matching_page():
|
|
| 243 |
|
| 244 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
| 245 |
TOP_X_PROJECTS = 30
|
| 246 |
-
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
| 247 |
-
#query,
|
| 248 |
-
model,
|
| 249 |
-
#embeddings,
|
| 250 |
-
TOP_X_PROJECTS)
|
| 251 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
| 252 |
# FIND MATCHES
|
| 253 |
## If only same country checkbox i sactivated
|
|
@@ -317,7 +312,6 @@ def show_single_matching_page():
|
|
| 317 |
else:
|
| 318 |
search_list = title_search_list
|
| 319 |
|
| 320 |
-
|
| 321 |
project_option = st.selectbox(
|
| 322 |
label = 'Search for a project',
|
| 323 |
index = None,
|
|
|
|
| 111 |
return model
|
| 112 |
|
| 113 |
# Load Embeddings
|
|
|
|
| 114 |
@st.cache_data
|
| 115 |
def load_embeddings_and_index():
|
| 116 |
# Load embeddings
|
|
|
|
| 119 |
embeddings = stored_data["embeddings"]
|
| 120 |
|
| 121 |
return embeddings
|
| 122 |
+
|
| 123 |
|
| 124 |
# USE CACHE FUNCTIONS
|
| 125 |
sim_matrix = load_sim_matrix()
|
|
|
|
| 134 |
|
| 135 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
| 136 |
model = load_model()
|
| 137 |
+
embeddings = load_embeddings_and_index()
|
| 138 |
|
| 139 |
def show_multi_matching_page():
|
| 140 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
|
|
|
| 198 |
)
|
| 199 |
|
| 200 |
# SEARCH BOX
|
| 201 |
+
query = st.text_input("Search Query")
|
| 202 |
|
| 203 |
with col3:
|
| 204 |
# COUNTRY SELECTION
|
|
|
|
| 242 |
|
| 243 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
| 244 |
TOP_X_PROJECTS = 30
|
| 245 |
+
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
| 247 |
# FIND MATCHES
|
| 248 |
## If only same country checkbox i sactivated
|
|
|
|
| 312 |
else:
|
| 313 |
search_list = title_search_list
|
| 314 |
|
|
|
|
| 315 |
project_option = st.selectbox(
|
| 316 |
label = 'Search for a project',
|
| 317 |
index = None,
|
src/embeddings.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c9bce42b5bef1adebd5b8e157c7dc3197d75c488931960dd8aa736329c024b1
|
| 3 |
+
size 67450241
|
src/extended_similarities.npz
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ae835bcd73475fdfc959b9f3682f0efa33240550a9405fb4e48638ea2e3175a
|
| 3 |
+
size 38542579
|
src/extended_similarities_nonsimorga.npz
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7737f324bf8e6998d1c761b92266d3677bac052a4db1872ecd3e3ba1d920913
|
| 3 |
+
size 27134286
|
src/projects/project_region.csv
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2788348
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09ff7b7aefbd3cf3a0f651b8120609d755ca7b8abc0b4026170385a5b8b903f9
|
| 3 |
size 2788348
|