Spaces:
Sleeping
Sleeping
| import faiss | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import torch.nn.functional as F | |
| from sentence_transformers import SentenceTransformer | |
| DIM = 768 | |
| model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) | |
| print("Model loaded successfully") | |
| papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index_col=None, on_bad_lines='skip') | |
| papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()] | |
| print("Data loaded successfully") | |
| with open('data/embeddings.npy', 'rb') as f: | |
| embeddings = np.load(f) | |
| index = faiss.IndexFlatL2(DIM) | |
| index.add(embeddings) | |
| print("Index loaded successfully") | |
| def encode_query(query): | |
| query_embeddings = model.encode([query], convert_to_tensor=True) | |
| query_embeddings = F.layer_norm(query_embeddings, normalized_shape=(query_embeddings.shape[1],)) | |
| query_embeddings = query_embeddings[:, :DIM] | |
| query_embeddings = F.normalize(query_embeddings, p=2, dim=1) | |
| return query_embeddings | |
| def search_nearest_papers(query, k=5): | |
| query_embeddings = encode_query(query) | |
| D, I = index.search(query_embeddings, k) | |
| return papers_df.iloc[I[0]][["Title", "arXiv_link"]] | |
| demo = gr.Interface( | |
| search_nearest_papers, | |
| [ | |
| "text", | |
| gr.Slider(1, 10, value=5), | |
| ], | |
| gr.Dataframe( | |
| headers=["Title", "PDF"], | |
| ), | |
| title="CVPR 2024 Paper Search", | |
| description="Semantic search over CPVR 2024 paper summary. This app was made using the data available on https://github.com/harpreetsahota204/CVPR-2024-Papers.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |