|
|
|
|
|
import gradio as gr |
|
|
|
|
|
from sentence_transformers import SentenceTransformer, CrossEncoder, util |
|
|
from torch import tensor as torch_tensor |
|
|
from datasets import load_dataset |
|
|
|
|
|
"""# import models""" |
|
|
|
|
|
bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1') |
|
|
bi_encoder.max_seq_length = 256 |
|
|
|
|
|
|
|
|
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') |
|
|
|
|
|
"""# import datasets""" |
|
|
|
|
|
dataset = load_dataset("gfhayworth/hack_policy", split='train') |
|
|
mypassages = list(dataset.to_pandas()['psg']) |
|
|
|
|
|
dataset_embed = load_dataset("gfhayworth/hack_policy_embed", split='train') |
|
|
dataset_embed_pd = dataset_embed.to_pandas() |
|
|
dataset_embed_pd |
|
|
type(dataset_embed_pd) |
|
|
mycorpus_embeddings = torch_tensor(dataset_embed_pd.values) |