Spaces:
Runtime error
Runtime error
| import ir_datasets | |
| import pandas as pd | |
| from autogluon.multimodal import MultiModalPredictor | |
| dataset = ir_datasets.load("beir/fiqa/dev") | |
| dataset = ir_datasets.load("beir/fiqa/dev") | |
| docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.0001) | |
| query_df = pd.DataFrame(dataset.queries_iter()).set_index("query_id") | |
| model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| predictor = MultiModalPredictor( | |
| pipeline="feature_extraction", | |
| hyperparameters={ | |
| "model.hf_text.checkpoint_name": model_name | |
| } | |
| ) | |
| document_embedding = predictor.extract_embedding(docs_df) | |
| query = "What happened when the dot com bubble burst?" | |
| query_embedding = predictor.extract_embedding([query]) | |
| import numpy as np | |
| q_norm = query_embedding['0'] / np.linalg.norm(query_embedding['0'], axis=-1, keepdims=True) | |
| d_norm = document_embedding['text'] / np.linalg.norm(document_embedding['text'], axis=-1, keepdims=True) | |
| scores = d_norm.dot(q_norm[0]) | |
| print(f'Question: {query}') | |
| print() | |
| for idx in np.argsort(-scores)[:2]: | |
| print(f'Top {idx} result:') | |
| print('-----------------') | |
| print(docs_df['text'].iloc[idx]) | |
| print() |