Spaces:
Sleeping
Sleeping
| import faiss | |
| import pickle | |
| import pandas as pd | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer | |
| from vector_engine.utils import vector_search | |
| def read_data(pibdata="pib2022_23_cleaned_abs.csv"): | |
| """Read the pib data.""" | |
| return pd.read_csv(pibdata) | |
| def load_bert_model(name="pushpdeep/sbertmsmarco-en_to_indic_ur-murilv1"): | |
| """Instantiate a sentence-level DistilBERT model.""" | |
| return SentenceTransformer(name) | |
| def load_faiss_index(path_to_faiss="models/faiss_index_ip.pickle"): | |
| """Load and deserialize the Faiss index.""" | |
| with open(path_to_faiss, "rb") as h: | |
| data = pickle.load(h) | |
| return faiss.deserialize_index(data) | |
| def main(): | |
| # Load data and models | |
| data = read_data() | |
| model = load_bert_model() | |
| faiss_index = load_faiss_index() | |
| st.title("Vector-based search with Sentence Transformers and Faiss") | |
| # User search | |
| user_input = st.text_area("Search box", "हिंद महासागर") | |
| # Filters | |
| st.sidebar.markdown("**Filters**") | |
| # filter_year = st.sidebar.slider("Publication year", 2010, 2021, (2010, 2021), 1) | |
| # filter_citations = st.sidebar.slider("Citations", 0, 250, 0) | |
| num_results = st.sidebar.slider("Number of search results", 10, 50, 10) | |
| # Fetch results | |
| if user_input: | |
| # Get paper IDs | |
| D, I = vector_search([user_input], model, faiss_index, num_results) | |
| # Slice data on year | |
| frame = data | |
| # Get individual results | |
| for id_ in I.flatten().tolist(): | |
| if id_ in set(frame.rid): | |
| f = frame[(frame.rid == id_)] | |
| else: | |
| continue | |
| st.write( | |
| f""" | |
| **Language**: {f.iloc[0].language} | |
| **Article**: {f.iloc[0].abstract} https://pib.gov.in/PressReleasePage.aspx?PRID={f.iloc[0].rid} | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| main() |