VerVelVel commited on
Commit
d5685d9
·
1 Parent(s): 9cf0111

new app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -7,7 +7,7 @@ import string
7
  import re
8
  import joblib
9
  # import pickle
10
- # import time
11
 
12
  # st.markdown("""
13
  # <style>
@@ -84,7 +84,7 @@ def clean_text(text):
84
  text = text.lower()
85
  return text
86
 
87
- def search_series(user_query, top_k=10):
88
  user_query = clean_text(user_query)
89
  query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
90
  weighted_query_embedding = np.concatenate((query_embedding * description_weight,
@@ -93,12 +93,31 @@ def search_series(user_query, top_k=10):
93
  weighted_query_embedding = lsa.transform(weighted_query_embedding)
94
  weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация
95
  D, I = index.search(weighted_query_embedding, top_k)
96
- results = data.iloc[I[0]].copy()
97
- cosine_similarities = D[0]
98
- results['cosine_similarity'] = cosine_similarities
99
- return results
 
 
100
 
101
  st.title('Поиск сериала по описанию')
102
  input_text = st.text_area('Введите описание сериала')
 
103
  if st.button('Поиск'):
104
- st.write(search_series(input_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import re
8
  import joblib
9
  # import pickle
10
+ import time
11
 
12
  # st.markdown("""
13
  # <style>
 
84
  text = text.lower()
85
  return text
86
 
87
+ def search_series(user_query, top_k):
88
  user_query = clean_text(user_query)
89
  query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
90
  weighted_query_embedding = np.concatenate((query_embedding * description_weight,
 
93
  weighted_query_embedding = lsa.transform(weighted_query_embedding)
94
  weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация
95
  D, I = index.search(weighted_query_embedding, top_k)
96
+ # results = data.iloc[I[0]].copy()
97
+ # cosine_similarities = D[0]
98
+ # results['cosine_similarity'] = cosine_similarities
99
+ return I[0], D[0]
100
+
101
+
102
 
103
  st.title('Поиск сериала по описанию')
104
  input_text = st.text_area('Введите описание сериала')
105
+ top_k = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
106
  if st.button('Поиск'):
107
+ start_time = time.time()
108
+ indices, distances = search_series(input_text, top_k)
109
+ end_time = time.time()
110
+ search_time = end_time - start_time
111
+ st.write("Результаты поиска:")
112
+ for idx, dist in zip(indices, distances):
113
+ results = data.iloc[idx]
114
+ st.write("---")
115
+ st.image(results['image_url'], width=250)
116
+ st.write(f"**Название:** {results['tvshow_title']}")
117
+ st.write(f"**Жанр:** {results['genre']}")
118
+ if len(results['description']) > 50:
119
+ results['description'] = ' '.join(results['description'].split()[:50]) + '...'
120
+ st.write(f"**Описание:** {results['description']}")
121
+ st.write(f"**Косинусное сходство:** {dist}")
122
+ st.write(f'**Время поиска:** {search_time:.4f} секунд')
123
+ st.markdown(f"[Читать далее]({results['page_url']})", unsafe_allow_html=True)