Juctxy commited on
Commit
a822f29
·
verified ·
1 Parent(s): a52b699

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +444 -0
app.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ import gradio as gr
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+
8
+ from deep_translator import GoogleTranslator
9
+
10
+ import os
11
+ import requests
12
+ import zipfile
13
+ from datasets import load_dataset
14
+ import base64
15
+ from PIL import Image
16
+
17
+
18
+ # Load CSV dataset
19
+ dataset_url = 'https://raw.githubusercontent.com/juctxy/book-recommendation/main/novel.csv'
20
+ dataset = load_dataset('csv', data_files=dataset_url)
21
+ df = pd.DataFrame(dataset['train'])
22
+ df = df[df["Summary"].notnull()].reset_index(drop=True)
23
+
24
+ descriptions = df["Summary"].tolist()
25
+ desc_samples = [str(text) for text in descriptions]
26
+
27
+ model = SentenceTransformer("all-MiniLM-L6-v2")
28
+
29
+ desc_embeddings = model.encode(desc_samples)
30
+
31
+ ranks = df["Rank"].tolist()
32
+ max_rank = max(ranks)
33
+
34
+ # Paths
35
+ zip_url = "https://github.com/juctxy/book-recommendation/raw/main/book_illustrations.zip" # Path to the ZIP file
36
+ zip_path = "book_illustrations.zip" # Local path to save the ZIP file
37
+ image_folder = "book_illustrations" # Folder to extract images
38
+
39
+ # Download the ZIP file
40
+ response = requests.get(zip_url)
41
+ with open(zip_path, 'wb') as file:
42
+ file.write(response.content)
43
+
44
+ # Unzip if not already extracted
45
+ if not os.path.exists(image_folder):
46
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
47
+ zip_ref.extractall(image_folder)
48
+
49
+
50
+ # Function to load images from local storage
51
+ def get_local_image(title):
52
+ filename = f"{title.replace(' ', '_').replace('/', '_')}.webp"
53
+ image_path = os.path.join(image_folder, filename)
54
+
55
+ if os.path.exists(image_path):
56
+ with open(image_path, "rb") as img_file:
57
+ img_str = base64.b64encode(img_file.read()).decode("utf-8")
58
+ return f"data:image/webp;base64,{img_str}"
59
+ else:
60
+ return None # If image is missing
61
+
62
+
63
+
64
+ # Function to calculate rank score
65
+ def calculate_rank_score(rank, max_rank):
66
+ return 1 - (rank / max_rank) # Normalized rank score
67
+
68
+
69
+ def default_top_books(language="English"):
70
+ top_10_ranked_indices = df.nsmallest(10, 'Rank').index
71
+ return "Some popular Novels", generate_html(top_10_ranked_indices, include_defaults=False, language=language)
72
+
73
+
74
+
75
+ def recommend_books(query, selected_categories, language="English"):
76
+ if not query or not query.strip():
77
+ return default_top_books()
78
+
79
+ # Translate query
80
+ try:
81
+ translated_query = GoogleTranslator(source="vi", target="en").translate(query)
82
+ except Exception as e:
83
+ print(f"Translation error: {e}")
84
+ translated_query = query # Fallback to original query
85
+
86
+ # Improved category filtering
87
+ if selected_categories:
88
+ # Normalize categories (lowercase + strip whitespace)
89
+ normalized_categories = [cat.strip().lower() for cat in selected_categories]
90
+
91
+ # Create boolean mask with proper category handling
92
+ category_mask = df['Categories'].apply(
93
+ lambda x: any(
94
+ cat in [c.strip().lower() for c in str(x).split(',')]
95
+ for cat in normalized_categories
96
+ ) if pd.notna(x) else False
97
+ )
98
+ filtered_df = df[category_mask]
99
+ else:
100
+ filtered_df = df
101
+
102
+ if filtered_df.empty:
103
+ return "No books found with the selected categories."
104
+
105
+ # Encode query and compute cosine similarities
106
+ query_embedding = model.encode([translated_query])
107
+ filtered_desc_embeddings = desc_embeddings[filtered_df.index]
108
+ similarities = cosine_similarity(query_embedding, filtered_desc_embeddings)[0]
109
+
110
+ # Get indices of top 10 similar books
111
+ top_10_indices = filtered_df.index[np.argsort(similarities)[::-1][:10]]
112
+
113
+ weighted_results = []
114
+ for i in top_10_indices:
115
+ sim_score = similarities[filtered_df.index.get_loc(i)]
116
+ rank_score = calculate_rank_score(df.loc[i, 'Rank'], df['Rank'].max())
117
+ final_score = (0.7 * sim_score) + (0.3 * rank_score)
118
+ if final_score >= 0.4:
119
+ weighted_results.append((i, final_score))
120
+
121
+ # Sort by final weighted score
122
+ weighted_results.sort(key=lambda x: x[1], reverse=True)
123
+ for idx, score in weighted_results:
124
+ print(f"Book: {df.loc[idx, 'Title']}, Final Score: {score}")
125
+ selected_indices = [idx for idx, _ in weighted_results]
126
+
127
+ return "Some novels you may like", generate_html(selected_indices, include_defaults=False, language=language)
128
+
129
+ def generate_html(selected_indices, include_defaults, language="English"):
130
+ result_html = """
131
+ <style>
132
+ .novel-container {
133
+ display: flex;
134
+ flex-wrap: wrap;
135
+ gap: 10px;
136
+ justify-content: center;
137
+ max-width: 1300px; /* Adjust max-width for 5 cards per row */
138
+ margin: 0 auto;
139
+ }
140
+ .novel-card {
141
+ border: 1px solid #000;
142
+ padding: 10px;
143
+ border-radius: 5px;
144
+ background-color: #333;
145
+ color: #fff;
146
+ width: calc(20% - 20px);
147
+ text-align: center;
148
+ cursor: pointer;
149
+ }
150
+ .novel-card h3 {
151
+ font-size: 16px;
152
+ margin-bottom: 5px;
153
+ color: #fff;
154
+ }
155
+ .novel-card p {
156
+ font-size: 12px;
157
+ color: #ccc;
158
+ }
159
+ .novel-card img {
160
+ width: 100%;
161
+ height: auto;
162
+ object-fit: cover;
163
+ border-radius: 5px;
164
+ }
165
+ @media (max-width: 768px) {
166
+ .novel-card {
167
+ width: calc(50% - 10px);
168
+ }
169
+ }
170
+ @media (max-width: 480px) {
171
+ .novel-card {
172
+ width: calc(100% - 10px);
173
+ height: auto;
174
+ }
175
+ }
176
+ </style>
177
+ <div class="novel-container">
178
+ """
179
+
180
+ translator = GoogleTranslator(source="en", target="vi")
181
+
182
+ for idx in selected_indices:
183
+ row = df.loc[idx]
184
+ title = row["Title"]
185
+ author = row["Author"]
186
+ summary = row["Summary"].replace("'", "\\'").replace("\n", "<br>")
187
+ if language == "Vietnamese":
188
+ summary = translator.translate(summary) # Translate summary to Vietnamese
189
+ rating = row["Rating"]
190
+ rank = row["Rank"]
191
+ chapters = row["Chapters"]
192
+ img_data = get_local_image(title)
193
+ if not img_data:
194
+ continue
195
+
196
+ result_html += f"""
197
+ <div class="novel-card" onclick="(function(){{
198
+ if(document.querySelector('.modal-overlay')) {{
199
+ return;
200
+ }}
201
+ var d = document.getElementById('summary{idx}');
202
+ if(d){{
203
+ var overlay = document.createElement('div');
204
+ overlay.className = 'modal-overlay';
205
+ overlay.style.position = 'fixed';
206
+ overlay.style.top = '0';
207
+ overlay.style.left = '0';
208
+ overlay.style.width = '100%';
209
+ overlay.style.height = '100%';
210
+ overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.7)';
211
+ overlay.style.zIndex = '999';
212
+
213
+ overlay.onclick = function(event) {{
214
+ if (event.target === overlay) {{
215
+ overlay.parentNode.removeChild(overlay);
216
+ }}
217
+ }};
218
+
219
+ var m = document.createElement('div');
220
+ m.className = 'modal-box';
221
+ m.style.position = 'fixed';
222
+ m.style.top = '50%';
223
+ m.style.left = '50%';
224
+ m.style.transform = 'translate(-50%, -50%)';
225
+ m.style.padding = '20px';
226
+ m.style.backgroundColor = '#333';
227
+ m.style.borderRadius = '8px';
228
+ m.style.maxWidth = '500px';
229
+ m.style.width = '80%';
230
+ m.style.boxShadow = '0 4px 8px rgba(0,0,0,0.2)';
231
+ m.style.overflow = 'auto';
232
+ m.innerHTML = d.innerHTML;
233
+ m.style.color = '#fff';
234
+
235
+ var closeButton = document.createElement('button');
236
+ closeButton.innerText = '✕';
237
+ closeButton.style.position = 'absolute';
238
+ closeButton.style.top = '10px';
239
+ closeButton.style.right = '10px';
240
+ closeButton.style.background = 'transparent';
241
+ closeButton.style.border = 'none';
242
+ closeButton.style.fontSize = '20px';
243
+ closeButton.style.cursor = 'pointer';
244
+ closeButton.onclick = function(){{
245
+ overlay.parentNode.removeChild(overlay);
246
+ }};
247
+ m.appendChild(closeButton);
248
+ overlay.appendChild(m);
249
+ document.body.appendChild(overlay);
250
+ }}
251
+ }})()">
252
+ <img src="{img_data}" alt="{title}">
253
+ <h3 style="font-size:20px; margin-bottom:5px; color:#fff;">{title}</h3>
254
+ <p style="color: white;font-size:16px;"><strong style="color: white;">Author:</strong> {author}<br>
255
+ <strong style="color: white;">Rating:</strong> {rating}<br>
256
+ <strong style="color: white;">Rank:</strong> {rank}<br>
257
+ <strong style="color: white;">Chapters:</strong> {chapters}</p>
258
+ <details id="summary{idx}" style="margin-top:5px; display:none;">
259
+ <summary style="color:#fff;"><strong>Summary</strong></summary>
260
+ <p style="margin-top:5px; color:#ccc;">{summary}</p>
261
+ </details>
262
+ </div>
263
+ """
264
+
265
+ result_html += "</div>"
266
+ return result_html
267
+
268
+
269
+ with gr.Blocks(css="""
270
+ .gradio-container {
271
+ background-color: black !important;
272
+ color: white !important;
273
+ }
274
+ .gradio-container a {
275
+ color: white !important;
276
+ }
277
+ /* Target all possible .gr-title containers */
278
+ .gradio-container .gr-title,
279
+ .gradio-container [class*="svelte-"] .gr-title {
280
+ color: white !important;
281
+ text-align: center !important;
282
+ font-size: 26px !important;
283
+ font-family: 'Source Sans Pro', sans-serif !important;
284
+ }
285
+
286
+ /* Force styles to children elements */
287
+ .gradio-container .gr-title h3,
288
+ .gradio-container .gr-title span,
289
+ .gradio-container .gr-title a {
290
+ color: inherit !important;
291
+ font-size: inherit !important;
292
+ font-family: inherit !important;
293
+ text-decoration: none;
294
+ }
295
+
296
+ /* Specific footer styling */
297
+ .gradio-container .gr-title[style*="26px"] {
298
+ font-size: 26px !important;
299
+ margin-top: 20px;
300
+ }
301
+ /* Fix footer color */
302
+ .gradio-container .gr-title .prose,
303
+ .gradio-container .gr-title .prose * {
304
+ color: white !important;
305
+ }
306
+
307
+ /* Force underline for links */
308
+ .gradio-container .gr-title a {
309
+ text-decoration: underline !important;
310
+ }
311
+
312
+ /* Override Gradio's last-child margin */
313
+ .gradio-container .gr-title .prose :last-child {
314
+ margin-bottom: 0 !important;
315
+ color: white !important;
316
+ }
317
+ .gr-row,
318
+ .gr-row * {
319
+ background-color: black !important;
320
+ color: white !important;
321
+ outline: none !important;
322
+ box-shadow: none !important;
323
+ }
324
+ .gr-checkboxgroup, .gr-checkboxgroup * {
325
+ background-color: black;
326
+ color: white;
327
+ }
328
+ .gr-checkboxgroup label {
329
+ background-color: black;
330
+ color: white;
331
+ }
332
+ .gr-checkboxgroup input[type="checkbox"] {
333
+ background-color: black;
334
+ color: white;
335
+ border: 1px solid white;
336
+ }
337
+ .gr-button {
338
+ background-color: black !important;
339
+ color: white !important;
340
+ border: 1px solid white !important;
341
+ cursor: pointer !important;
342
+ }
343
+ .gr-button:hover {
344
+ background-color: #222 !important;
345
+ }
346
+ /* Remove all focus outlines */
347
+ input:focus,
348
+ textarea:focus,
349
+ select:focus,
350
+ button:focus {
351
+ outline: none !important;
352
+ box-shadow: none !important;
353
+ border-color: white !important;
354
+ }
355
+ """) as demo:
356
+ title_state = gr.State("Some popular Novel")
357
+
358
+ gr.Markdown(
359
+ "### Huy's Brilliant Library: Web Novel Corner",
360
+ elem_classes="gr-title"
361
+ )
362
+
363
+ # Query input
364
+ query_input = gr.Textbox(
365
+ lines=1,
366
+ placeholder="Enter your book query...",
367
+ label="Query",
368
+ elem_classes="gr-row"
369
+ )
370
+
371
+ split_categories = df['Categories'].apply(lambda x: x.split(',') if isinstance(x, str) else []).explode()
372
+ unique_categories = split_categories.str.strip().unique()
373
+ unique_categories = sorted(unique_categories)
374
+ category_filter = gr.CheckboxGroup(
375
+ choices=unique_categories,
376
+ label="Select Categories",
377
+ elem_classes="gr-checkboxgroup"
378
+ )
379
+
380
+ # Language selector
381
+ language_selector = gr.Radio(
382
+ choices=["English", "Vietnamese"],
383
+ label="Select Language",
384
+ value="English", # Default language
385
+ elem_classes="gr-row"
386
+ )
387
+
388
+ # Search button
389
+ recommend_button = gr.Button("Search", elem_classes="gr-button")
390
+
391
+ # Markdown title and HTML output for recommendations
392
+ title_markdown = gr.Markdown(elem_id="title", elem_classes="gr-title")
393
+ output_html = gr.HTML()
394
+
395
+ # Event handlers
396
+ query_input.submit(
397
+ fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
398
+ inputs=[query_input, category_filter, language_selector],
399
+ outputs=[title_state, output_html]
400
+ )
401
+
402
+ recommend_button.click(
403
+ fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
404
+ inputs=[query_input, category_filter, language_selector],
405
+ outputs=[title_state, output_html]
406
+ )
407
+
408
+ # Language toggle event
409
+ language_selector.change(
410
+ fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
411
+ inputs=[query_input, category_filter, language_selector],
412
+ outputs=[title_state, output_html]
413
+ )
414
+
415
+ # Initial load
416
+ demo.load(
417
+ fn=lambda language: default_top_books(language),
418
+ inputs=[language_selector],
419
+ outputs=[title_state, output_html]
420
+ )
421
+
422
+ # Title update
423
+ title_state.change(
424
+ fn=lambda x: f"""<div class="gr-title">{x}</div>""",
425
+ inputs=[title_state],
426
+ outputs=[title_markdown]
427
+ )
428
+
429
+ # Layout
430
+ gr.Row(
431
+ query_input,
432
+ category_filter,
433
+ language_selector,
434
+ recommend_button,
435
+ elem_classes="gr-row"
436
+ )
437
+
438
+ gr.Markdown(
439
+ """Hope you find some novels you love. Enjoy!<br>
440
+ <a href="https://www.webnovelworld.org/home" target="_blank" style="text-decoration: underline !important;">Check it out here</a>""",
441
+ elem_classes="gr-title"
442
+ )
443
+
444
+ demo.launch(share=True)