Spaces:
Running
Running
add omartificial to app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import numpy as np
|
|
| 8 |
# Load models
|
| 9 |
model = SentenceTransformer("distilbert-base-multilingual-cased")
|
| 10 |
modela = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
|
|
|
| 11 |
|
| 12 |
# Load data
|
| 13 |
df = pd.read_csv("cleaned1.csv")
|
|
@@ -19,10 +20,16 @@ embeddings = torch.load("embeddings1_1.pt")
|
|
| 19 |
embeddings2 = torch.load("embeddings2_1.pt")
|
| 20 |
embeddings3 = torch.load("embeddings3_1.pt")
|
| 21 |
|
|
|
|
| 22 |
embeddingsa = torch.load("embeddings1.pt")
|
| 23 |
embeddingsa2 = torch.load("embeddings2.pt")
|
| 24 |
embeddingsa3 = torch.load("embeddings3.pt")
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Extract questions and links
|
| 27 |
df_questions = df["question"].values
|
| 28 |
df_links = df["link"].values
|
|
@@ -127,14 +134,17 @@ def predict(text):
|
|
| 127 |
# Semantic similarity scores
|
| 128 |
query_embedding = model.encode(text, convert_to_tensor=True)
|
| 129 |
query_embeddinga = modela.encode(text, convert_to_tensor=True)
|
| 130 |
-
|
| 131 |
# Cosine similarities (averaged from two models)
|
| 132 |
sim_scores1 = (util.pytorch_cos_sim(query_embedding, embeddings)[0] +
|
| 133 |
-
util.pytorch_cos_sim(query_embeddinga, embeddingsa)[0]
|
|
|
|
| 134 |
sim_scores2 = (util.pytorch_cos_sim(query_embedding, embeddings2)[0] +
|
| 135 |
-
util.pytorch_cos_sim(query_embeddinga, embeddingsa2)[0]
|
|
|
|
| 136 |
sim_scores3 = (util.pytorch_cos_sim(query_embedding, embeddings3)[0] +
|
| 137 |
-
util.pytorch_cos_sim(query_embeddinga, embeddingsa3)[0]
|
|
|
|
| 138 |
|
| 139 |
# BM25 scores
|
| 140 |
bm25_scores1 = compute_bm25_scores(text, bm25_model1,corpus_length1)
|
|
|
|
| 8 |
# Load models
|
| 9 |
model = SentenceTransformer("distilbert-base-multilingual-cased")
|
| 10 |
modela = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
|
| 11 |
+
modelb = SentenceTransformer("Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka")
|
| 12 |
|
| 13 |
# Load data
|
| 14 |
df = pd.read_csv("cleaned1.csv")
|
|
|
|
| 20 |
embeddings2 = torch.load("embeddings2_1.pt")
|
| 21 |
embeddings3 = torch.load("embeddings3_1.pt")
|
| 22 |
|
| 23 |
+
|
| 24 |
embeddingsa = torch.load("embeddings1.pt")
|
| 25 |
embeddingsa2 = torch.load("embeddings2.pt")
|
| 26 |
embeddingsa3 = torch.load("embeddings3.pt")
|
| 27 |
|
| 28 |
+
|
| 29 |
+
embeddingsb = torch.load("embeddingso1_3.pt")
|
| 30 |
+
embeddingsb2 = torch.load("embeddingso2_3.pt")
|
| 31 |
+
embeddingsb3 = torch.load("embeddingso3_3.pt")
|
| 32 |
+
|
| 33 |
# Extract questions and links
|
| 34 |
df_questions = df["question"].values
|
| 35 |
df_links = df["link"].values
|
|
|
|
| 134 |
# Semantic similarity scores
|
| 135 |
query_embedding = model.encode(text, convert_to_tensor=True)
|
| 136 |
query_embeddinga = modela.encode(text, convert_to_tensor=True)
|
| 137 |
+
query_embeddingb = modelb.encode(text, convert_to_tensor=True)
|
| 138 |
# Cosine similarities (averaged from two models)
|
| 139 |
sim_scores1 = (util.pytorch_cos_sim(query_embedding, embeddings)[0] +
|
| 140 |
+
util.pytorch_cos_sim(query_embeddinga, embeddingsa)[0] +
|
| 141 |
+
util.pytorch_cos_sim(query_embeddingb, embeddingsb)[0] ) / 3
|
| 142 |
sim_scores2 = (util.pytorch_cos_sim(query_embedding, embeddings2)[0] +
|
| 143 |
+
util.pytorch_cos_sim(query_embeddinga, embeddingsa2)[0] +
|
| 144 |
+
util.pytorch_cos_sim(query_embeddingb, embeddingsb2)[0])/ 3
|
| 145 |
sim_scores3 = (util.pytorch_cos_sim(query_embedding, embeddings3)[0] +
|
| 146 |
+
util.pytorch_cos_sim(query_embeddinga, embeddingsa3)[0]+
|
| 147 |
+
util.pytorch_cos_sim(query_embeddingb, embeddingsb3)[0]) / 3
|
| 148 |
|
| 149 |
# BM25 scores
|
| 150 |
bm25_scores1 = compute_bm25_scores(text, bm25_model1,corpus_length1)
|