Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,9 +33,21 @@ joined_df = matn_info.merge(df[cols_to_use], left_index=True, right_on='__index_
|
|
| 33 |
df = joined_df.copy()
|
| 34 |
|
| 35 |
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
| 36 |
-
|
| 37 |
|
| 38 |
def find_most_similar_matn(text, n):
|
| 39 |
embed_text = model.encode(araby.strip_diacritics(text))
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
|
|
|
| 33 |
df = joined_df.copy()
|
| 34 |
|
| 35 |
model = SentenceTransformer('FDSRashid/QulBERT', token=Secret_token)
|
| 36 |
+
arr = np.array(df['embed'].to_list())
|
| 37 |
|
| 38 |
def find_most_similar_matn(text, n):
|
| 39 |
embed_text = model.encode(araby.strip_diacritics(text))
|
| 40 |
+
cos_sim = cosine_similarity(embed_text.reshape(1, -1), arr)
|
| 41 |
+
indices = np.argsort(cos_sim)[0][-n:]
|
| 42 |
+
matns = df.iloc[indices]
|
| 43 |
+
matns['Similarity'] = cos_sim[0][indices]
|
| 44 |
+
return matns[['Book', 'matn', 'taraf_ID', 'Book ID', 'Hadith Number', 'Author', 'Similarity']]
|
| 45 |
+
|
| 46 |
+
with gr.Blocks() as demo:
|
| 47 |
+
text_input = gr.Textbox()
|
| 48 |
+
num_hadith = gr.Slider(1, 50, value = 5, label = 'Num Hadith', info = 'Choose the number of Hadith to Return')
|
| 49 |
+
text_output = gr.DataFrame()
|
| 50 |
+
text_button = gr.Button("Retrieve")
|
| 51 |
+
text_button.click(find_most_similar_matn, inputs=[text_input, num_hadith], outputs=text_output)
|
| 52 |
+
demo.launch()
|
| 53 |
|