whoisida commited on
Commit
64e8b5d
·
verified ·
1 Parent(s): 640fb08

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. book_embeddings6.pth +3 -0
  3. dataset_all.csv +3 -0
  4. requirements.txt +64 -0
  5. stream_5.py +67 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset_all.csv filter=lfs diff=lfs merge=lfs -text
book_embeddings6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c63b66f5dfb711ac7f2cb043620a29c5f3ec32bb69e61828caea3098648ef8f9
3
+ size 36412873
dataset_all.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2c7bebcb0c5efe1685f50870fda789ea2572c782e2e661f2045abbbfbbc550
3
+ size 46342847
requirements.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.2.0
2
+ attrs==23.2.0
3
+ blinker==1.7.0
4
+ cachetools==5.3.2
5
+ certifi==2024.2.2
6
+ charset-normalizer==3.3.2
7
+ click==8.1.7
8
+ filelock==3.13.1
9
+ fsspec==2024.2.0
10
+ gitdb==4.0.11
11
+ GitPython==3.1.41
12
+ huggingface-hub==0.20.3
13
+ idna==3.6
14
+ importlib-metadata==7.0.1
15
+ Jinja2==3.1.3
16
+ joblib==1.3.2
17
+ jsonschema==4.21.1
18
+ jsonschema-specifications==2023.12.1
19
+ markdown-it-py==3.0.0
20
+ MarkupSafe==2.1.5
21
+ mdurl==0.1.2
22
+ mpmath==1.3.0
23
+ networkx==3.2.1
24
+ nltk==3.8.1
25
+ numpy==1.26.4
26
+ packaging==23.2
27
+ pandas==2.2.0
28
+ pillow==10.2.0
29
+ protobuf==4.25.2
30
+ pyarrow==15.0.0
31
+ pydeck==0.8.1b0
32
+ Pygments==2.17.2
33
+ python-dateutil==2.8.2
34
+ pytz==2024.1
35
+ PyYAML==6.0.1
36
+ referencing==0.33.0
37
+ regex==2023.12.25
38
+ requests==2.31.0
39
+ rich==13.7.0
40
+ rpds-py==0.17.1
41
+ safetensors==0.4.2
42
+ scikit-learn==1.4.0
43
+ scipy==1.12.0
44
+ sentence-transformers==2.3.1
45
+ sentencepiece==0.1.99
46
+ six==1.16.0
47
+ smmap==5.0.1
48
+ streamlit==1.31.0
49
+ sympy==1.12
50
+ tenacity==8.2.3
51
+ threadpoolctl==3.2.0
52
+ tokenizers==0.15.1
53
+ toml==0.10.2
54
+ toolz==0.12.1
55
+ torch==2.2.0
56
+ tornado==6.4
57
+ tqdm==4.66.1
58
+ transformers==4.37.2
59
+ typing_extensions==4.9.0
60
+ tzdata==2023.4
61
+ tzlocal==5.2
62
+ urllib3==2.2.0
63
+ validators==0.22.0
64
+ zipp==3.17.0
stream_5.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from PIL import Image
6
+ import requests
7
+ import torch
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ # Load BERT model
11
+ model = SentenceTransformer('cointegrated/rubert-tiny2')
12
+
13
+ # Load dataset
14
+ databook = pd.read_csv('/Users/idaklimanova/ds_bootcamp/ds-phase-2/project4/dataset_all.csv')
15
+
16
+ # Load saved embeddings
17
+ embeddings = torch.load('book_embeddings3.pth')
18
+
19
+ # Function to get most similar books
20
+ def get_most_similar_books(user_query, embeddings):
21
+ # Encode user query
22
+ query_embedding = model.encode(user_query, convert_to_tensor=True)
23
+ user_query = util.normalize_embeddings(query_embedding.unsqueeze(0)) # Add unsqueeze to add batch dimension
24
+
25
+ # Convert torch tensors to numpy arrays
26
+ user_query_np = user_query.cpu().detach().numpy()
27
+ embeddings_np = embeddings.cpu().detach().numpy()
28
+
29
+ # Calculate similarity using cosine similarity
30
+ similarities = cosine_similarity(user_query_np, embeddings_np)
31
+
32
+ # Get indices of most similar books
33
+ indices = np.argsort(similarities[0])[::-1][:5] # Get top 5 indices with highest cosine similarity
34
+ return indices
35
+
36
+
37
+ def main():
38
+ st.title("Рекомендации книг")
39
+
40
+ # Get user query
41
+ user_query = st.text_input("Введите описание книги:")
42
+
43
+ button = st.button('Отправить запрос', type="primary")
44
+
45
+ # Show recommended books
46
+ if user_query:
47
+ st.write("Наиболее подходящие книги для вас:")
48
+ similar_books_indices = get_most_similar_books(user_query, embeddings)
49
+ for idx in similar_books_indices:
50
+ book = databook.iloc[idx]
51
+
52
+ # Display book cover image and description side by side
53
+ col1, col2 = st.columns([1, 4]) # Layout ratio 1:4
54
+ with col1:
55
+ if pd.notna(book['image_url']):
56
+ try:
57
+ image = Image.open(requests.get(book['image_url'], stream=True).raw)
58
+ st.image(image, caption='Обложка книги', width=130)
59
+ except Exception as e:
60
+ st.write("Ошибка при загрузке изображения:", e)
61
+ with col2:
62
+ st.subheader(book['title'])
63
+ st.write(f"Автор: {book['author']}")
64
+ st.write(f"Описание: {book['annotation']}")
65
+
66
+ if __name__ == "__main__":
67
+ main()