Spaces:
Sleeping
Sleeping
new app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import string
|
|
| 7 |
import re
|
| 8 |
import joblib
|
| 9 |
# import pickle
|
| 10 |
-
|
| 11 |
|
| 12 |
# st.markdown("""
|
| 13 |
# <style>
|
|
@@ -84,7 +84,7 @@ def clean_text(text):
|
|
| 84 |
text = text.lower()
|
| 85 |
return text
|
| 86 |
|
| 87 |
-
def search_series(user_query, top_k
|
| 88 |
user_query = clean_text(user_query)
|
| 89 |
query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
|
| 90 |
weighted_query_embedding = np.concatenate((query_embedding * description_weight,
|
|
@@ -93,12 +93,31 @@ def search_series(user_query, top_k=10):
|
|
| 93 |
weighted_query_embedding = lsa.transform(weighted_query_embedding)
|
| 94 |
weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация
|
| 95 |
D, I = index.search(weighted_query_embedding, top_k)
|
| 96 |
-
results = data.iloc[I[0]].copy()
|
| 97 |
-
cosine_similarities = D[0]
|
| 98 |
-
results['cosine_similarity'] = cosine_similarities
|
| 99 |
-
return
|
|
|
|
|
|
|
| 100 |
|
| 101 |
st.title('Поиск сериала по описанию')
|
| 102 |
input_text = st.text_area('Введите описание сериала')
|
|
|
|
| 103 |
if st.button('Поиск'):
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import re
|
| 8 |
import joblib
|
| 9 |
# import pickle
|
| 10 |
+
import time
|
| 11 |
|
| 12 |
# st.markdown("""
|
| 13 |
# <style>
|
|
|
|
| 84 |
text = text.lower()
|
| 85 |
return text
|
| 86 |
|
| 87 |
+
def search_series(user_query, top_k):
|
| 88 |
user_query = clean_text(user_query)
|
| 89 |
query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
|
| 90 |
weighted_query_embedding = np.concatenate((query_embedding * description_weight,
|
|
|
|
| 93 |
weighted_query_embedding = lsa.transform(weighted_query_embedding)
|
| 94 |
weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация
|
| 95 |
D, I = index.search(weighted_query_embedding, top_k)
|
| 96 |
+
# results = data.iloc[I[0]].copy()
|
| 97 |
+
# cosine_similarities = D[0]
|
| 98 |
+
# results['cosine_similarity'] = cosine_similarities
|
| 99 |
+
return I[0], D[0]
|
| 100 |
+
|
| 101 |
+
|
| 102 |
|
| 103 |
st.title('Поиск сериала по описанию')
|
| 104 |
input_text = st.text_area('Введите описание сериала')
|
| 105 |
+
top_k = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
|
| 106 |
if st.button('Поиск'):
|
| 107 |
+
start_time = time.time()
|
| 108 |
+
indices, distances = search_series(input_text, top_k)
|
| 109 |
+
end_time = time.time()
|
| 110 |
+
search_time = end_time - start_time
|
| 111 |
+
st.write("Результаты поиска:")
|
| 112 |
+
for idx, dist in zip(indices, distances):
|
| 113 |
+
results = data.iloc[idx]
|
| 114 |
+
st.write("---")
|
| 115 |
+
st.image(results['image_url'], width=250)
|
| 116 |
+
st.write(f"**Название:** {results['tvshow_title']}")
|
| 117 |
+
st.write(f"**Жанр:** {results['genre']}")
|
| 118 |
+
if len(results['description']) > 50:
|
| 119 |
+
results['description'] = ' '.join(results['description'].split()[:50]) + '...'
|
| 120 |
+
st.write(f"**Описание:** {results['description']}")
|
| 121 |
+
st.write(f"**Косинусное сходство:** {dist}")
|
| 122 |
+
st.write(f'**Время поиска:** {search_time:.4f} секунд')
|
| 123 |
+
st.markdown(f"[Читать далее]({results['page_url']})", unsafe_allow_html=True)
|