Spaces:
Sleeping
Sleeping
File size: 7,458 Bytes
3e7c848 9cf0111 d5685d9 a9cdb8b 2ee99b8 a9cdb8b 9cf0111 2ee99b8 d85e2fa 2ee99b8 916222b 2ee99b8 b6f511b 2ee99b8 b6f511b 9cf0111 9705034 9cf0111 2ee99b8 a838f9c 7c545ee 9cf0111 2ee99b8 9cf0111 d5685d9 9cf6a6a a838f9c 7061e3b 953c27c 3a27c14 953c27c 7061e3b a9cdb8b 7061e3b 953c27c 2ee99b8 a9cdb8b a838f9c bb157b8 2ee99b8 d5685d9 7a57131 d5685d9 bb157b8 d5685d9 7c545ee d5685d9 a838f9c 3a27c14 d5685d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import string
import re
import joblib
import time
import base64
def get_base64_of_bin_file(bin_file):
with open(bin_file, 'rb') as f:
data = f.read()
return base64.b64encode(data).decode()
# Путь к вашему локальному изображению
img_file = 'fon3.jpg' # Убедитесь, что путь правильный относительно вашего скрипта
# Преобразование изображения в base64
img_base64 = get_base64_of_bin_file(img_file)
page_bg_img = f"""
<style>
[data-testid="stAppViewContainer"] {{
background: linear-gradient(rgba(255, 255, 255, 0.5), rgba(255, 255, 255, 0.5)), url("data:image/jpeg;base64,{img_base64}");
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
filter: brightness(1.1); /* Adjust the brightness here */
}}
.custom-title {{
font-size: 70px;
font-weight: bold;
color: #120c01;
text-align: center;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
}}
</style>
"""
st.markdown(page_bg_img, unsafe_allow_html=True)
# Загрузка данных
@st.cache_resource
def load_data_models():
data = pd.read_csv('data/series_edited.csv')
# data['description'] = data['description'].astype(str)
combined_embeddings = np.load('embeddings/combined_embeddings_2.npy')
index = faiss.read_index('embeddings/faiss_index_2.bin')
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
lsa = joblib.load('embeddings/lsa_model.pkl')
return data, combined_embeddings, index, embedder, lsa
data, combined_embeddings, index, embedder, lsa = load_data_models()
#Функция предобработки текста
def clean_text(text):
text = re.sub(r'\r\n', ' ', text)
text = text.replace('\xa0', ' ')
clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\s]')
text = clean_pattern.sub('', text)
url_pattern = re.compile(r'http\S+|www\S+|https\S+')
text = url_pattern.sub(r'', text)
text = text.translate(str.maketrans('', '', string.punctuation))
text = text.lower()
return text
#Функция поиска подходящего по пользовательскому запросу сериала
def search_series(user_query, top_k, description_weight, actors_weight, genre_weight):
user_query = clean_text(user_query)
query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
weighted_query_embedding = np.concatenate((query_embedding * description_weight,
query_embedding * actors_weight,
query_embedding * genre_weight), axis=1)
weighted_query_embedding = lsa.transform(weighted_query_embedding)
weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация
D, I = index.search(weighted_query_embedding, top_k)
# results = data.iloc[I[0]].copy()
# cosine_similarities = D[0]
# results['cosine_similarity'] = cosine_similarities
return I[0], D[0]
# Инициализация весов в session_state
if 'description_weight' not in st.session_state:
st.session_state['description_weight'] = 0.7
if 'actors_weight' not in st.session_state:
st.session_state['actors_weight'] = 0.15
if 'genre_weight' not in st.session_state:
st.session_state['genre_weight'] = 0.15
# Инициализация оригинальных весов для сброса
if 'original_description_weight' not in st.session_state:
st.session_state['original_description_weight'] = 0.7
if 'original_actors_weight' not in st.session_state:
st.session_state['original_actors_weight'] = 0.15
if 'original_genre_weight' not in st.session_state:
st.session_state['original_genre_weight'] = 0.15
# Функция для сброса весов к первоначальным значениям
def reset_weights():
st.session_state['description_weight'] = 0.7
st.session_state['actors_weight'] = 0.15
st.session_state['genre_weight'] = 0.15
# Функция для расчета пропорционального изменения весов
def update_weights():
total_original_weight = st.session_state['original_description_weight'] + st.session_state['original_actors_weight'] + st.session_state['original_genre_weight']
total_new_weight = st.session_state['description_weight'] + st.session_state['actors_weight'] + st.session_state['genre_weight']
if total_original_weight != 0 and total_new_weight != 0:
proportion = total_original_weight / total_new_weight
st.session_state['description_weight'] *= proportion
st.session_state['actors_weight'] *= proportion
st.session_state['genre_weight'] *= proportion
# Слайдеры для настройки весов
description_weight = st.sidebar.slider("Вес описания", 0.0, 1.0, st.session_state['description_weight'], step=0.01)
actors_weight = st.sidebar.slider("Вес актеров", 0.0, 1.0, st.session_state['actors_weight'], step=0.01)
genre_weight = st.sidebar.slider("Вес жанра", 0.0, 1.0, st.session_state['genre_weight'], step=0.01)
# Обновляем значения весов в session_state
st.session_state['description_weight'] = description_weight
st.session_state['actors_weight'] = actors_weight
st.session_state['genre_weight'] = genre_weight
# Обработчик кнопки сброса весов
if st.sidebar.button("Обновить веса"):
reset_weights()
# Обновляем веса пропорционально
update_weights()
st.markdown('<h1 class="custom-title">📽️FIND MY SHOW📽️</h1>', unsafe_allow_html=True)
# st.title('✨FIND MY SHOW✨')
st.header('Поиск сериала по описанию')
input_text = st.text_area('Введите описание сериала')
top_k = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
if st.button('Поиск'):
start_time = time.time()
indices, distances = search_series(input_text, top_k, description_weight, actors_weight, genre_weight)
end_time = time.time()
search_time = end_time - start_time
st.write("Результаты поиска:")
for idx, dist in zip(indices, distances):
results = data.iloc[idx]
st.write("---")
st.image(results['image_url'], width=400)
st.write(f"**Название:** **{results['tvshow_title']}**")
st.write(f"**Жанр:** {results['genre']}")
if len(results['description']) > 50:
results['description'] = ' '.join(results['description'].split()[:50]) + '...'
st.write(f"**Описание:** {results['description']}")
st.write(f"**Рейтинг КП :** {round(results['kinopoisk_rating'], 2)}, **Рейтинг IMDb :** {round(results['imdb'], 2)}")
st.write(f"**Косинусное сходство:** {round(dist, 3)}")
st.write(f'**Время поиска:** {search_time:.3f} секунд')
st.markdown(f"[Читать далее]({results['page_url']})", unsafe_allow_html=True) |