File size: 7,458 Bytes
3e7c848
 
 
 
 
9cf0111
 
 
d5685d9
a9cdb8b
2ee99b8
a9cdb8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cf0111
2ee99b8
d85e2fa
2ee99b8
916222b
2ee99b8
b6f511b
 
2ee99b8
b6f511b
9cf0111
 
 
9705034
 
9cf0111
 
 
 
 
 
 
 
 
 
 
2ee99b8
a838f9c
7c545ee
9cf0111
2ee99b8
9cf0111
 
 
 
 
 
d5685d9
 
 
 
 
9cf6a6a
 
 
 
 
 
 
a838f9c
7061e3b
 
 
 
 
 
 
 
953c27c
3a27c14
 
 
 
 
953c27c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7061e3b
a9cdb8b
7061e3b
 
953c27c
 
2ee99b8
a9cdb8b
a838f9c
bb157b8
2ee99b8
d5685d9
7a57131
d5685d9
bb157b8
d5685d9
 
 
 
 
 
7c545ee
 
d5685d9
 
 
 
a838f9c
 
3a27c14
d5685d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import string
import re
import joblib
import time
import base64

def get_base64_of_bin_file(bin_file):
    with open(bin_file, 'rb') as f:
        data = f.read()
    return base64.b64encode(data).decode()

# Путь к вашему локальному изображению
img_file = 'fon3.jpg'  # Убедитесь, что путь правильный относительно вашего скрипта

# Преобразование изображения в base64
img_base64 = get_base64_of_bin_file(img_file)

page_bg_img = f"""
<style>
[data-testid="stAppViewContainer"] {{
background: linear-gradient(rgba(255, 255, 255, 0.5), rgba(255, 255, 255, 0.5)), url("data:image/jpeg;base64,{img_base64}");
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
filter: brightness(1.1); /* Adjust the brightness here */
}}
.custom-title {{
    font-size: 70px;
    font-weight: bold;
    color: #120c01;
    text-align: center;
    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
}}
</style>
"""

st.markdown(page_bg_img, unsafe_allow_html=True)

# Загрузка данных
@st.cache_resource
def load_data_models():
    data = pd.read_csv('data/series_edited.csv')
    # data['description'] = data['description'].astype(str)
    combined_embeddings = np.load('embeddings/combined_embeddings_2.npy')
    index = faiss.read_index('embeddings/faiss_index_2.bin')
    embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
    lsa = joblib.load('embeddings/lsa_model.pkl')

    return data, combined_embeddings, index, embedder, lsa

data, combined_embeddings, index, embedder, lsa = load_data_models()

#Функция предобработки текста
def clean_text(text):
    text = re.sub(r'\r\n', ' ', text)
    text = text.replace('\xa0', ' ')
    clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\s]')
    text = clean_pattern.sub('', text)
    url_pattern = re.compile(r'http\S+|www\S+|https\S+')
    text = url_pattern.sub(r'', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()
    return text

#Функция поиска подходящего по пользовательскому запросу сериала
def search_series(user_query, top_k, description_weight, actors_weight, genre_weight):
    user_query = clean_text(user_query)
    query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
    weighted_query_embedding = np.concatenate((query_embedding * description_weight,
                                               query_embedding * actors_weight,
                                               query_embedding * genre_weight), axis=1)
    weighted_query_embedding = lsa.transform(weighted_query_embedding)
    weighted_query_embedding = weighted_query_embedding / np.linalg.norm(weighted_query_embedding, axis=1, keepdims=True)  # Нормализация
    D, I = index.search(weighted_query_embedding, top_k)
    # results = data.iloc[I[0]].copy()
    # cosine_similarities = D[0]
    # results['cosine_similarity'] = cosine_similarities
    return I[0], D[0] 

# Инициализация весов в session_state
if 'description_weight' not in st.session_state:
    st.session_state['description_weight'] = 0.7
if 'actors_weight' not in st.session_state:
    st.session_state['actors_weight'] = 0.15
if 'genre_weight' not in st.session_state:
    st.session_state['genre_weight'] = 0.15

# Инициализация оригинальных весов для сброса
if 'original_description_weight' not in st.session_state:
    st.session_state['original_description_weight'] = 0.7
if 'original_actors_weight' not in st.session_state:
    st.session_state['original_actors_weight'] = 0.15
if 'original_genre_weight' not in st.session_state:
    st.session_state['original_genre_weight'] = 0.15

# Функция для сброса весов к первоначальным значениям
def reset_weights():
    st.session_state['description_weight'] = 0.7
    st.session_state['actors_weight'] = 0.15
    st.session_state['genre_weight'] = 0.15

# Функция для расчета пропорционального изменения весов
def update_weights():
    total_original_weight = st.session_state['original_description_weight'] + st.session_state['original_actors_weight'] + st.session_state['original_genre_weight']
    total_new_weight = st.session_state['description_weight'] + st.session_state['actors_weight'] + st.session_state['genre_weight']
    
    if total_original_weight != 0 and total_new_weight != 0:
        proportion = total_original_weight / total_new_weight
        st.session_state['description_weight'] *= proportion
        st.session_state['actors_weight'] *= proportion
        st.session_state['genre_weight'] *= proportion

# Слайдеры для настройки весов
description_weight = st.sidebar.slider("Вес описания", 0.0, 1.0, st.session_state['description_weight'], step=0.01)
actors_weight = st.sidebar.slider("Вес актеров", 0.0, 1.0, st.session_state['actors_weight'], step=0.01)
genre_weight = st.sidebar.slider("Вес жанра", 0.0, 1.0, st.session_state['genre_weight'], step=0.01)

# Обновляем значения весов в session_state
st.session_state['description_weight'] = description_weight
st.session_state['actors_weight'] = actors_weight
st.session_state['genre_weight'] = genre_weight

# Обработчик кнопки сброса весов
if st.sidebar.button("Обновить веса"):
    reset_weights()

# Обновляем веса пропорционально
update_weights()

st.markdown('<h1 class="custom-title">📽️FIND MY SHOW📽️</h1>', unsafe_allow_html=True)
# st.title('✨FIND MY SHOW✨')
st.header('Поиск сериала по описанию')
input_text = st.text_area('Введите описание сериала')
top_k = st.slider("Количество результатов", min_value=1, max_value=20, value=5)
if st.button('Поиск'):
        start_time = time.time() 
        indices, distances = search_series(input_text, top_k, description_weight, actors_weight, genre_weight)
        end_time = time.time()  
        search_time = end_time - start_time
        st.write("Результаты поиска:")
        for idx, dist in zip(indices, distances):
            results = data.iloc[idx]
            st.write("---")
            st.image(results['image_url'], width=400)
            st.write(f"**Название:** **{results['tvshow_title']}**")
            st.write(f"**Жанр:** {results['genre']}")
            if len(results['description']) > 50:
                results['description'] = ' '.join(results['description'].split()[:50]) + '...'
            st.write(f"**Описание:** {results['description']}")
            st.write(f"**Рейтинг КП :** {round(results['kinopoisk_rating'], 2)}, **Рейтинг IMDb :** {round(results['imdb'], 2)}")
            st.write(f"**Косинусное сходство:** {round(dist, 3)}")
            st.write(f'**Время поиска:** {search_time:.3f} секунд')
            st.markdown(f"[Читать далее]({results['page_url']})", unsafe_allow_html=True)