import streamlit as st import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer import faiss import string import re import joblib import time import base64 def get_base64_of_bin_file(bin_file): with open(bin_file, 'rb') as f: data = f.read() return base64.b64encode(data).decode() # Путь к вашему локальному изображению img_file = 'fon3.jpg' # Убедитесь, что путь правильный относительно вашего скрипта # Преобразование изображения в base64 img_base64 = get_base64_of_bin_file(img_file) page_bg_img = f""" """ st.markdown(page_bg_img, unsafe_allow_html=True) # Загрузка данных @st.cache_resource def load_data_models(): data = pd.read_csv('data/series_edited.csv') # data['description'] = data['description'].astype(str) combined_embeddings = np.load('embeddings/combined_embeddings_2.npy') index = faiss.read_index('embeddings/faiss_index_2.bin') embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') lsa = joblib.load('embeddings/lsa_model.pkl') return data, combined_embeddings, index, embedder, lsa data, combined_embeddings, index, embedder, lsa = load_data_models() #Функция предобработки текста def clean_text(text): text = re.sub(r'\r\n', ' ', text) text = text.replace('\xa0', ' ') clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\s]') text = clean_pattern.sub('', text) url_pattern = re.compile(r'http\S+|www\S+|https\S+') text = url_pattern.sub(r'', text) text = text.translate(str.maketrans('', '', string.punctuation)) text = text.lower() return text #Функция поиска подходящего по пользовательскому запросу сериала def search_series(user_query, top_k, description_weight, actors_weight, genre_weight): user_query = clean_text(user_query) query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy() weighted_query_embedding = np.concatenate((query_embedding * description_weight, query_embedding * actors_weight, query_embedding * genre_weight), axis=1) weighted_query_embedding = lsa.transform(weighted_query_embedding) weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True) # Нормализация D, I = index.search(weighted_query_embedding, top_k) # results = data.iloc[I[0]].copy() # cosine_similarities = D[0] # results['cosine_similarity'] = cosine_similarities return I[0], D[0] # Инициализация весов в session_state if 'description_weight' not in st.session_state: st.session_state['description_weight'] = 0.7 if 'actors_weight' not in st.session_state: st.session_state['actors_weight'] = 0.15 if 'genre_weight' not in st.session_state: st.session_state['genre_weight'] = 0.15 # Инициализация оригинальных весов для сброса if 'original_description_weight' not in st.session_state: st.session_state['original_description_weight'] = 0.7 if 'original_actors_weight' not in st.session_state: st.session_state['original_actors_weight'] = 0.15 if 'original_genre_weight' not in st.session_state: st.session_state['original_genre_weight'] = 0.15 # Функция для сброса весов к первоначальным значениям def reset_weights(): st.session_state['description_weight'] = 0.7 st.session_state['actors_weight'] = 0.15 st.session_state['genre_weight'] = 0.15 # Функция для расчета пропорционального изменения весов def update_weights(): total_original_weight = st.session_state['original_description_weight'] + st.session_state['original_actors_weight'] + st.session_state['original_genre_weight'] total_new_weight = st.session_state['description_weight'] + st.session_state['actors_weight'] + st.session_state['genre_weight'] if total_original_weight != 0 and total_new_weight != 0: proportion = total_original_weight / total_new_weight st.session_state['description_weight'] *= proportion st.session_state['actors_weight'] *= proportion st.session_state['genre_weight'] *= proportion # Слайдеры для настройки весов description_weight = st.sidebar.slider("Вес описания", 0.0, 1.0, st.session_state['description_weight'], step=0.01) actors_weight = st.sidebar.slider("Вес актеров", 0.0, 1.0, st.session_state['actors_weight'], step=0.01) genre_weight = st.sidebar.slider("Вес жанра", 0.0, 1.0, st.session_state['genre_weight'], step=0.01) # Обновляем значения весов в session_state st.session_state['description_weight'] = description_weight st.session_state['actors_weight'] = actors_weight st.session_state['genre_weight'] = genre_weight # Обработчик кнопки сброса весов if st.sidebar.button("Обновить веса"): reset_weights() # Обновляем веса пропорционально update_weights() st.markdown('