Spaces:

GIZ
/

Development-Project-Synergy-Finder

Running on CPU Upgrade

App Files Files Community

Jan Mühlnikel commited on Mar 18, 2024

Commit

fd7cbe7

1 Parent(s): a2d83b8

move all to one app page

Browse files

Files changed (18) hide show

__pycache__/similarity.cpython-310.pyc +0 -0
__pycache__/similarity_page.cpython-310.pyc +0 -0
home.py +0 -4
sdg.py +0 -0
sector.py +0 -225
similarity.py +0 -112
utils/__pycache__/crs_table.cpython-310.pyc +0 -0
utils/__pycache__/filter_modules.cpython-310.pyc +0 -0
utils/__pycache__/navbar.cpython-310.pyc +0 -0
utils/__pycache__/sdg_table.cpython-310.pyc +0 -0
utils/__pycache__/semantic_search.cpython-310.pyc +0 -0
utils/__pycache__/similarity_table.cpython-310.pyc +0 -0
utils/crs_table.py +0 -49
utils/filter_modules.py +0 -21
utils/navbar.py +0 -50
utils/sdg_table.py +0 -43
utils/semantic_search.py +0 -19
utils/similarity_table.py +0 -53

__pycache__/similarity.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/similarity.cpython-310.pyc and b/__pycache__/similarity.cpython-310.pyc differ

__pycache__/similarity_page.cpython-310.pyc ADDED Viewed

Binary file (3.96 kB). View file

home.py DELETED Viewed

@@ -1,4 +0,0 @@
-import streamlit as st
-def show_page():
-    st.write("home")

sdg.py DELETED Viewed

File without changes

sector.py DELETED Viewed

@@ -1,225 +0,0 @@
-"""
-Page to analyse the link between crs codes, countries and organizations
-"""
-################
-# DEPENDENCIES #
-################
-import streamlit as st
-import pandas as pd
-import utils.crs_table as crs_table
-import utils.sdg_table as sdg_table
-import utils.filter_modules as filter_modules
-"""
-from importlib.machinery import SourceFileLoader
-crs_overlap = SourceFileLoader("crs_overlap", "data/models/crs_overlap.py").load_module()
-sdg_overlap = SourceFileLoader("sdg_overlap", "data/models/sdg_overlap.py").load_module()
-CONSTANTS = SourceFileLoader("CONSTANTS", "config/CONSTANTS.py").load_module()
-# CHACHE DATA
-# FETCH NEEDED DATA AND STORE IN CHACHE MEMORY TO SAVE LOADING TIME
-@st.cache_data
-def getCRS3():
-    # Read in CRS3 CODELISTS
-    crs3_df = pd.read_csv('app/src/codelists/crs3_codes.csv')
-    CRS3_CODES = crs3_df['code'].tolist()
-    CRS3_NAME = crs3_df['name'].tolist()
-    CRS3_MERGED = {f"{name} - {code}": code for name, code in zip(CRS3_NAME, CRS3_CODES)}
-    return CRS3_MERGED
-@st.cache_data
-def getCRS5():
-    # Read in CRS3 CODELISTS
-    crs5_df = pd.read_csv('app/src/codelists/crs5_codes.csv')
-    CRS5_CODES = crs5_df['code'].tolist()
-    CRS5_NAME = crs5_df['name'].tolist()
-    CRS5_MERGED = {code: [f"{name} - {code}"] for name, code in zip(CRS5_NAME, CRS5_CODES)}
-    return CRS5_MERGED
-@st.cache_data
-def getSDG():
-    # Read in SDG CODELISTS
-    sdg_df = pd.read_csv('app/src/codelists/sdg_goals.csv')
-    SDG_NAMES = sdg_df['name'].tolist()
-    return SDG_NAMES
-@st.cache_data
-def getCountry():
-    # Read in countries from codelist
-    country_df = pd.read_csv('app/src/codelists/country_codes_ISO3166-1alpha-2.csv')
-    COUNTRY_CODES = country_df['Alpha-2 code'].tolist()
-    COUNTRY_NAMES = country_df['Country'].tolist()
-    return country_df, COUNTRY_CODES, COUNTRY_NAMES
-CRS3_MERGED = getCRS3()
-CRS5_MERGED = getCRS5()
-SDG_NAMES = getSDG()
-country_df, COUNTRY_CODES, COUNTRY_NAMES = getCountry()
-# SPECIAL SELECTIONS
-## COUNTRY
-SPECIAL_COUNTRY_SLECTIONS = ["All"]
-SHOW_ALL_COUNTRIES = False # If all countries should be showed in matching
-## ORGANIZATION
-SPECIAL_ORGA_SLECTIONS = ["All"]
-SHOW_ALL_ORGAS = False
-"""
-########
-# PAGE #
-########
-def show_page():
-    """
-    def show_crs():
-        # SESSION STATES
-        st.session_state.crs5_option_disabled = True
-        # SELECTION FIELDS
-        col1, col2 = st.columns([1, 1])
-        with col1:
-            #####################
-            # CRS 3 CODE SELECT #
-            #####################
-            crs3_option = st.multiselect(
-                'CRS 3',
-                CRS3_MERGED,
-                placeholder="Select"
-                )
-            #####################
-            # CRS 5 CODE SELECT #
-            #####################
-            # Only enable crs5 select field when crs3 code is selected
-            if crs3_option != []:
-                st.session_state.crs5_option_disabled = False
-            # define list of crs5 codes dependend on crs3 codes
-            crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]
-            # crs5 select field
-            crs5_option = st.multiselect(
-                'CRS 5',
-                crs5_list,
-                placeholder="Select",
-                disabled=st.session_state.crs5_option_disabled
-                )
-        with col2:
-            # COUNTRY SELECTION
-            country_option = filter_modules.country_option(SPECIAL_COUNTRY_SLECTIONS, COUNTRY_NAMES)
-            # ORGA SELECTION
-            orga_option = filter_modules.orga_option(SPECIAL_ORGA_SLECTIONS, CONSTANTS.ORGA_SEARCH)
-        ################
-        # SHOW RESULTS #
-        ################
-        # Extract Orgas from multiselect
-        if "All" in orga_option:
-            SHOW_ALL_ORGAS = True
-            selected_orgas = []
-        else:
-            SHOW_ALL_ORGAS = False
-            selected_orgas = [str(o).replace(")", "").lower().split("(")[1] for o in orga_option]
-        if country_option != []:
-            # all selection
-            if "All" in country_option:
-                SHOW_ALL_COUNTRIES = True
-                country_option.remove("All")
-            else:
-                SHOW_ALL_COUNTRIES = False
-            if crs3_option != []:
-                # CRS 3 codes from option
-                crs3_list = [i[-3:] for i in crs3_option]
-                # get country codes from multiselect
-                country_names = [str(c) for c in country_option]
-                country_codes = [
-                    country_df[country_df['Country'] == c]['Alpha-2 code'].values[0].replace('"', "").strip(" ")
-                    for c in country_names
-                    ]
-                result_df = crs_overlap.calc_crs3(crs3_list, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
-                if crs5_option != []:
-                    # CRS 5 codes from option
-                    crs5_list = [i[-5:] for i in crs5_option]
-                    result_df = crs_overlap.calc_crs5(crs5_list, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
-                # TABLE FOR CRS OVERLAP
-                crs_table.show_table(result_df)
-    def show_sdg():
-        # SELECTION
-        col1, col2 = st.columns([1, 1])
-        with col1:
-            # CRS3 CODE SELECT
-            sdg_option = st.selectbox(
-                label = 'SDG',
-                index = None,
-                placeholder = "Select SDG",
-                options = SDG_NAMES,
-                )
-        with col2:
-            # COUNTRY SELECTION
-            country_option = filter_modules.country_option(SPECIAL_COUNTRY_SLECTIONS, COUNTRY_NAMES)
-            # ORGA SELECTION
-            orga_option = filter_modules.orga_option(SPECIAL_ORGA_SLECTIONS, CONSTANTS.ORGA_SEARCH)
-        # SHOW RESULTS
-        if sdg_option != None:
-            sdg_int = int(sdg_option.split(" ")[0].replace(".", ""))
-            # Extract Orgas from multiselect
-            if "All" in orga_option:
-                SHOW_ALL_ORGAS = True
-                selected_orgas = []
-            else:
-                SHOW_ALL_ORGAS = False
-                selected_orgas = [str(o).replace(")", "").lower().split("(")[1] for o in orga_option]
-            if country_option != []:
-                # all selection
-                if "All" in country_option:
-                    SHOW_ALL_COUNTRIES = True
-                    country_option.remove("All")
-                else:
-                    SHOW_ALL_COUNTRIES = False
-                country_names = [str(c) for c in country_option]
-                country_codes = [
-                    country_df[country_df['Country'] == c]['Alpha-2 code'].values[0].replace('"', "").strip(" ")
-                    for c in country_names
-                    ]
-                result_df = sdg_overlap.calc_crs3(sdg_int, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
-                # TABLE FOR SDG OVERLAP
-                sdg_table.show_table(result_df)
-    # SELECT IF CRS or SDG Match
-    match_option = st.selectbox(
-                label = 'Matching Method',
-                index = 0,
-                placeholder = "Select",
-                options = ["CRS", "SDG"],
-                )
-    st.write("------------------")
-    if match_option == "CRS":
-        show_crs()
-    elif match_option == "SDG":
-        show_sdg()
-    """

similarity.py DELETED Viewed

@@ -1,112 +0,0 @@
-"""
-Page for similarities
-"""
-################
-# DEPENDENCIES #
-################
-import streamlit as st
-import pandas as pd
-from scipy.sparse import load_npz
-import pickle
-import faiss
-from sentence_transformers import SentenceTransformer
-import utils.similarity_table as similarity_table
-import utils.semantic_search as semantic_search
-import psutil
-import os
-def get_process_memory():
-    process = psutil.Process(os.getpid())
-    return process.memory_info().rss / (1024 * 1024)
-# Catch DATA
-# Load Similarity matrix
-@st.cache_data
-def load_sim_matrix():
-    loaded_matrix = load_npz("src/similarities.npz")
-    dense_matrix = loaded_matrix.toarray()
-    return dense_matrix
-@st.cache_data
-def load_projects():
-    orgas_df = pd.read_csv("src/projects/project_orgas.csv")
-    region_df = pd.read_csv("src/projects/project_region.csv")
-    sector_df = pd.read_csv("src/projects/project_sector.csv")
-    status_df = pd.read_csv("src/projects/project_status.csv")
-    texts_df = pd.read_csv("src/projects/project_texts.csv")
-    projects_df = pd.merge(orgas_df, region_df, on='iati_id', how='inner')
-    projects_df = pd.merge(projects_df, sector_df, on='iati_id', how='inner')
-    projects_df = pd.merge(projects_df, status_df, on='iati_id', how='inner')
-    projects_df = pd.merge(projects_df, texts_df, on='iati_id', how='inner')
-    return projects_df
-@st.cache_resource
-def load_model():
-    model = SentenceTransformer('all-MiniLM-L6-v2')
-    return model
-# LOAD EMBEDDINGS
-@st.cache_data
-def load_embeddings_and_index():
-    # Load embeddings
-    with open("src/embeddings.pkl", "rb") as fIn:
-        stored_data = pickle.load(fIn)
-    sentences = stored_data["sentences"]
-    embeddings = stored_data["embeddings"]
-    # Load or create FAISS index
-    dimension = embeddings.shape[1]
-    faiss_index = faiss.IndexFlatL2(dimension)
-    faiss_index.add(embeddings)
-    return sentences, embeddings, faiss_index
-# LOAD DATA
-sim_matrix = load_sim_matrix()
-projects_df = load_projects()
-model = load_model()
-sentences, embeddings, faiss_index = load_embeddings_and_index()
-def show_page():
-    st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
-    st.write("Similarities")
-    semantic_search.show_search(model, faiss_index, sentences)
-    df_subset = projects_df.head(10)
-    selected_index = st.selectbox('Select an entry', df_subset.index, format_func=lambda x: df_subset.loc[x, 'iati_id'])
-    st.write(selected_index)
-    # add index and similarity together
-    indecies = range(0, len(sim_matrix))
-    similarities = sim_matrix[selected_index]
-    zipped_sims = list(zip(indecies, similarities))
-    # remove all 0 similarities
-    filtered_sims = [(index, similarity) for index, similarity in zipped_sims if similarity != 0]
-    # Select and sort top 20 most similar projects
-    sorted_sims = sorted(filtered_sims, key=lambda x: x[1], reverse=True)
-    top_20_sims = sorted_sims[:20]
-    # create result data frame
-    index_list = [tup[0] for tup in top_20_sims]
-    print(index_list)
-    result_df = projects_df.iloc[index_list]
-    print(len(result_df))
-    print(len(result_df))
-    # add other colums to result df
-    similarity_list = [tup[1] for tup in top_20_sims]
-    result_df["similarity"] = similarity_list
-    similarity_table.show_table(result_df, similarity_list)

utils/__pycache__/crs_table.cpython-310.pyc DELETED Viewed

Binary file (1.21 kB)

utils/__pycache__/filter_modules.cpython-310.pyc DELETED Viewed

Binary file (997 Bytes)

utils/__pycache__/navbar.cpython-310.pyc DELETED Viewed

Binary file (1.14 kB)

utils/__pycache__/sdg_table.cpython-310.pyc DELETED Viewed

Binary file (1.19 kB)

utils/__pycache__/semantic_search.cpython-310.pyc DELETED Viewed

Binary file (825 Bytes)

utils/__pycache__/similarity_table.cpython-310.pyc DELETED Viewed

Binary file (1.41 kB)

utils/crs_table.py DELETED Viewed

@@ -1,49 +0,0 @@
-import streamlit as st
-def show_table(data_df):
-    st.write("------------------")
-    st.dataframe(
-        data_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "crs_3_code", "crs_5_code"]],
-        use_container_width = True,
-        height = 35 + 35 * len(data_df),
-        column_config={
-            "orga_abbreviation": st.column_config.TextColumn(
-                "Organization",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "client": st.column_config.TextColumn(
-                "Client",
-                help="Client organization of customer",
-                disabled=True
-            ),
-            "title_main": st.column_config.TextColumn(
-                "Title",
-                help="If title not in English, title in other language provided",
-                disabled=True
-            ),
-            "description_main": st.column_config.TextColumn(
-                "Description",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "country": st.column_config.TextColumn(
-                "Country",
-                help="Country of project",
-                disabled=True
-            ),
-            "crs_3_code": st.column_config.TextColumn(
-                "CRS 3",
-                help="CRS 3",
-                disabled=True
-            ),
-            "crs_5_code": st.column_config.TextColumn(
-                "CRS 5",
-                help="CRS 5",
-                disabled=True
-            ),
-        },
-        hide_index=True,
-    )

utils/filter_modules.py DELETED Viewed

@@ -1,21 +0,0 @@
-import pandas as pd
-import streamlit as st
-def country_option(special_cases, country_names):
-    country_option = st.multiselect(
-                'Country / Countries',
-                special_cases + country_names,
-                placeholder="Select"
-                )
-    return country_option
-def orga_option(special_cases, orga_names):
-    orga_list = special_cases + [f"{v[0]} ({k})" for k, v in orga_names.items()]
-    orga_option = st.multiselect(
-                'Development Bank / Organization',
-                orga_list,
-                placeholder="Select"
-                )
-    return orga_option

utils/navbar.py DELETED Viewed

@@ -1,50 +0,0 @@
-import streamlit as st
-from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
-# giz-dsc colors
-# orange: #e5b50d
-# green: #48d47b
-# blue: #0da2dc
-# grey: #dadada
-# giz colors https://www.giz.de/cdc/en/html/59638.html
-# red: #c80f0f
-# grey: #6f6f6f
-# light_grey: #b2b2b2
-# light_red: #eba1a3
-def show_navbar():
-    st.markdown("<h1 style='color: red;'>THIS APP IS WORK IN PROGRESS ...</h1>", unsafe_allow_html=True)
-    navbar = option_menu(None, ["Home", "Sector Matches", 'Similarity Matches'],
-        icons=['house', 'list-task', "list-task", 'list-task'],
-        menu_icon="cast", default_index=0, orientation="horizontal",
-        styles={
-            "container": {
-                "padding": "0!important",
-                "background-color": "#F0F0F0"
-            },
-            "icon": {
-                "color": "#c80f0f",
-                "font-size": "25px"
-            },
-            "nav-link": {
-                "font-size": "25px",
-                "text-align": "left",
-                "margin":"0px",
-                "--hover-color": "#b2b2b2"
-            },
-            "nav-link-selected": {
-                "background-color": "#F0F0F0"
-            },
-            "nav-link-text": {
-                "color": "#333333"
-            },
-            "icon-active": {
-                "color": "#dadada"
-            }
-        }
-    )
-    return navbar

utils/sdg_table.py DELETED Viewed

@@ -1,43 +0,0 @@
-import streamlit as st
-def show_table(data_df):
-    st.write("------------------")
-    st.dataframe(
-        data_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code"]],
-        use_container_width = True,
-        height = 35 + 35 * len(data_df),
-        column_config={
-            "orga_abbreviation": st.column_config.TextColumn(
-                "Organization",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "client": st.column_config.TextColumn(
-                "Client",
-                help="Client organization of customer",
-                disabled=True
-            ),
-            "title_main": st.column_config.TextColumn(
-                "Title",
-                help="If title not in English, title in other language provided",
-                disabled=True
-            ),
-            "description_main": st.column_config.TextColumn(
-                "Description",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "country": st.column_config.TextColumn(
-                "Country",
-                help="Country of project",
-                disabled=True
-            ),
-            "sgd_pred_code": st.column_config.TextColumn(
-                "SDG Prediction",
-                help="Prediction of SDG's",
-                disabled=True
-            ),
-        },
-        hide_index=True,
-    )

utils/semantic_search.py DELETED Viewed

@@ -1,19 +0,0 @@
-import pickle
-import faiss
-import streamlit as st
-from sentence_transformers import SentenceTransformer
-def show_search(model, faiss_index, sentences):
-    query = st.text_input("Enter your search query:")
-    if query:
-        # Convert query to embedding
-        query_embedding = model.encode([query])[0].reshape(1, -1)
-        # Perform search
-        D, I = faiss_index.search(query_embedding, k=5)  # Search for top 5 similar items
-        # Display results
-        st.write("Top results:")
-        for i in I[0]:
-            st.write(sentences[i])

utils/similarity_table.py DELETED Viewed

@@ -1,53 +0,0 @@
-import streamlit as st
-def show_table(data_df, similarities:list):
-    st.write("------------------")
-    st.dataframe(
-        data_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
-        use_container_width = True,
-        height = 35 + 35 * len(data_df),
-        column_config={
-            "orga_abbreviation": st.column_config.TextColumn(
-                "Organization",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "client": st.column_config.TextColumn(
-                "Client",
-                help="Client organization of customer",
-                disabled=True
-            ),
-            "title_main": st.column_config.TextColumn(
-                "Title",
-                help="If title not in English, title in other language provided",
-                disabled=True
-            ),
-            "description_main": st.column_config.TextColumn(
-                "Description",
-                help="If description not in English, description in other language provided",
-                disabled=True
-            ),
-            "country": st.column_config.TextColumn(
-                "Country",
-                help="Country of project",
-                disabled=True
-            ),
-            "sgd_pred_code": st.column_config.TextColumn(
-                "SDG Prediction",
-                help="Prediction of SDG's",
-                disabled=True
-            ),
-            "crs_3_code": st.column_config.TextColumn(
-                "CRS 3",
-                help="CRS 3 code given by organization",
-                disabled=True
-            ),
-            "crs_5_code": st.column_config.TextColumn(
-                "CRS 5",
-                help="CRS 5 code given by organization",
-                disabled=True
-            ),
-        },
-        hide_index=True,
-    )