Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,9 @@ from datasets import load_dataset
|
|
| 6 |
from datasets import Features
|
| 7 |
from datasets import Value
|
| 8 |
from datasets import Dataset
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
|
| 11 |
Secret_token = os.getenv('HF_token')
|
| 12 |
|
|
@@ -30,5 +32,10 @@ cols_to_use = df.columns.difference(matn_info.columns)
|
|
| 30 |
joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
|
| 31 |
df = joined_df.copy()
|
| 32 |
|
|
|
|
|
|
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
|
|
|
|
| 6 |
from datasets import Features
|
| 7 |
from datasets import Value
|
| 8 |
from datasets import Dataset
|
| 9 |
+
from sentence_transformers import SentenceTransformer
|
| 10 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 11 |
+
import os
|
| 12 |
|
| 13 |
Secret_token = os.getenv('HF_token')
|
| 14 |
|
|
|
|
| 32 |
joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_level_0__')
|
| 33 |
df = joined_df.copy()
|
| 34 |
|
| 35 |
+
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
| 36 |
+
|
| 37 |
|
| 38 |
+
def find_most_similar_matn(text, n):
|
| 39 |
+
embed_text = model.encode(araby.strip_diacritics(text))
|
| 40 |
+
|
| 41 |
|