Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

Jan Mühlnikel commited on Mar 18, 2024

Commit

139b395

1 Parent(s): 5ae2590

added semantic search engine

Files changed (4) hide show

__pycache__/similarity.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/similarity.cpython-310.pyc and b/__pycache__/similarity.cpython-310.pyc differ

similarity.py CHANGED Viewed

@@ -8,7 +8,11 @@ Page for similarities
 import streamlit as st
 import pandas as pd
 from scipy.sparse import load_npz
 import utils.similarity_table as similarity_table
 import psutil
 import os
@@ -40,14 +44,39 @@ def load_projects():
     return projects_df
 # LOAD DATA
 sim_matrix = load_sim_matrix()
 projects_df = load_projects()
 def show_page():
     st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
     st.write("Similarities")
     df_subset = projects_df.head(10)
     selected_index = st.selectbox('Select an entry', df_subset.index, format_func=lambda x: df_subset.loc[x, 'iati_id'])

 import streamlit as st
 import pandas as pd
 from scipy.sparse import load_npz
+import pickle
+import faiss
+from sentence_transformers import SentenceTransformer
 import utils.similarity_table as similarity_table
+import utils.semantic_search as semantic_search
 import psutil
 import os
     return projects_df
+@st.cache_resource
+def load_model():
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    return model
+# LOAD EMBEDDINGS
+@st.cache_data
+def load_embeddings_and_index():
+    # Load embeddings
+    with open("..\synergy-app\src\embeddings.pkl", "rb") as fIn:
+        stored_data = pickle.load(fIn)
+    sentences = stored_data["sentences"]
+    embeddings = stored_data["embeddings"]
+    # Load or create FAISS index
+    dimension = embeddings.shape[1]
+    faiss_index = faiss.IndexFlatL2(dimension)
+    faiss_index.add(embeddings)
+    return sentences, embeddings, faiss_index
 # LOAD DATA
 sim_matrix = load_sim_matrix()
 projects_df = load_projects()
+model = load_model()
+sentences, embeddings, faiss_index = load_embeddings_and_index()
 def show_page():
     st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
     st.write("Similarities")
+    semantic_search.show_search(model, faiss_index, sentences)
     df_subset = projects_df.head(10)
     selected_index = st.selectbox('Select an entry', df_subset.index, format_func=lambda x: df_subset.loc[x, 'iati_id'])

utils/__pycache__/semantic_search.cpython-310.pyc ADDED Viewed

Binary file (825 Bytes). View file

utils/semantic_search.py ADDED Viewed

+import pickle
+import faiss
+import streamlit as st
+from sentence_transformers import SentenceTransformer
+def show_search(model, faiss_index, sentences):
+    query = st.text_input("Enter your search query:")
+    if query:
+        # Convert query to embedding
+        query_embedding = model.encode([query])[0].reshape(1, -1)
+        # Perform search
+        D, I = faiss_index.search(query_embedding, k=5)  # Search for top 5 similar items
+        # Display results
+        st.write("Top results:")
+        for i in I[0]:
+            st.write(sentences[i])