book_recommend / app.py
Juctxy's picture
Upload app.py
a822f29 verified
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
from deep_translator import GoogleTranslator
import os
import requests
import zipfile
from datasets import load_dataset
import base64
from PIL import Image
# Load CSV dataset
dataset_url = 'https://raw.githubusercontent.com/juctxy/book-recommendation/main/novel.csv'
dataset = load_dataset('csv', data_files=dataset_url)
df = pd.DataFrame(dataset['train'])
df = df[df["Summary"].notnull()].reset_index(drop=True)
descriptions = df["Summary"].tolist()
desc_samples = [str(text) for text in descriptions]
model = SentenceTransformer("all-MiniLM-L6-v2")
desc_embeddings = model.encode(desc_samples)
ranks = df["Rank"].tolist()
max_rank = max(ranks)
# Paths
zip_url = "https://github.com/juctxy/book-recommendation/raw/main/book_illustrations.zip" # Path to the ZIP file
zip_path = "book_illustrations.zip" # Local path to save the ZIP file
image_folder = "book_illustrations" # Folder to extract images
# Download the ZIP file
response = requests.get(zip_url)
with open(zip_path, 'wb') as file:
file.write(response.content)
# Unzip if not already extracted
if not os.path.exists(image_folder):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(image_folder)
# Function to load images from local storage
def get_local_image(title):
filename = f"{title.replace(' ', '_').replace('/', '_')}.webp"
image_path = os.path.join(image_folder, filename)
if os.path.exists(image_path):
with open(image_path, "rb") as img_file:
img_str = base64.b64encode(img_file.read()).decode("utf-8")
return f"data:image/webp;base64,{img_str}"
else:
return None # If image is missing
# Function to calculate rank score
def calculate_rank_score(rank, max_rank):
return 1 - (rank / max_rank) # Normalized rank score
def default_top_books(language="English"):
top_10_ranked_indices = df.nsmallest(10, 'Rank').index
return "Some popular Novels", generate_html(top_10_ranked_indices, include_defaults=False, language=language)
def recommend_books(query, selected_categories, language="English"):
if not query or not query.strip():
return default_top_books()
# Translate query
try:
translated_query = GoogleTranslator(source="vi", target="en").translate(query)
except Exception as e:
print(f"Translation error: {e}")
translated_query = query # Fallback to original query
# Improved category filtering
if selected_categories:
# Normalize categories (lowercase + strip whitespace)
normalized_categories = [cat.strip().lower() for cat in selected_categories]
# Create boolean mask with proper category handling
category_mask = df['Categories'].apply(
lambda x: any(
cat in [c.strip().lower() for c in str(x).split(',')]
for cat in normalized_categories
) if pd.notna(x) else False
)
filtered_df = df[category_mask]
else:
filtered_df = df
if filtered_df.empty:
return "No books found with the selected categories."
# Encode query and compute cosine similarities
query_embedding = model.encode([translated_query])
filtered_desc_embeddings = desc_embeddings[filtered_df.index]
similarities = cosine_similarity(query_embedding, filtered_desc_embeddings)[0]
# Get indices of top 10 similar books
top_10_indices = filtered_df.index[np.argsort(similarities)[::-1][:10]]
weighted_results = []
for i in top_10_indices:
sim_score = similarities[filtered_df.index.get_loc(i)]
rank_score = calculate_rank_score(df.loc[i, 'Rank'], df['Rank'].max())
final_score = (0.7 * sim_score) + (0.3 * rank_score)
if final_score >= 0.4:
weighted_results.append((i, final_score))
# Sort by final weighted score
weighted_results.sort(key=lambda x: x[1], reverse=True)
for idx, score in weighted_results:
print(f"Book: {df.loc[idx, 'Title']}, Final Score: {score}")
selected_indices = [idx for idx, _ in weighted_results]
return "Some novels you may like", generate_html(selected_indices, include_defaults=False, language=language)
def generate_html(selected_indices, include_defaults, language="English"):
result_html = """
<style>
.novel-container {
display: flex;
flex-wrap: wrap;
gap: 10px;
justify-content: center;
max-width: 1300px; /* Adjust max-width for 5 cards per row */
margin: 0 auto;
}
.novel-card {
border: 1px solid #000;
padding: 10px;
border-radius: 5px;
background-color: #333;
color: #fff;
width: calc(20% - 20px);
text-align: center;
cursor: pointer;
}
.novel-card h3 {
font-size: 16px;
margin-bottom: 5px;
color: #fff;
}
.novel-card p {
font-size: 12px;
color: #ccc;
}
.novel-card img {
width: 100%;
height: auto;
object-fit: cover;
border-radius: 5px;
}
@media (max-width: 768px) {
.novel-card {
width: calc(50% - 10px);
}
}
@media (max-width: 480px) {
.novel-card {
width: calc(100% - 10px);
height: auto;
}
}
</style>
<div class="novel-container">
"""
translator = GoogleTranslator(source="en", target="vi")
for idx in selected_indices:
row = df.loc[idx]
title = row["Title"]
author = row["Author"]
summary = row["Summary"].replace("'", "\\'").replace("\n", "<br>")
if language == "Vietnamese":
summary = translator.translate(summary) # Translate summary to Vietnamese
rating = row["Rating"]
rank = row["Rank"]
chapters = row["Chapters"]
img_data = get_local_image(title)
if not img_data:
continue
result_html += f"""
<div class="novel-card" onclick="(function(){{
if(document.querySelector('.modal-overlay')) {{
return;
}}
var d = document.getElementById('summary{idx}');
if(d){{
var overlay = document.createElement('div');
overlay.className = 'modal-overlay';
overlay.style.position = 'fixed';
overlay.style.top = '0';
overlay.style.left = '0';
overlay.style.width = '100%';
overlay.style.height = '100%';
overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.7)';
overlay.style.zIndex = '999';
overlay.onclick = function(event) {{
if (event.target === overlay) {{
overlay.parentNode.removeChild(overlay);
}}
}};
var m = document.createElement('div');
m.className = 'modal-box';
m.style.position = 'fixed';
m.style.top = '50%';
m.style.left = '50%';
m.style.transform = 'translate(-50%, -50%)';
m.style.padding = '20px';
m.style.backgroundColor = '#333';
m.style.borderRadius = '8px';
m.style.maxWidth = '500px';
m.style.width = '80%';
m.style.boxShadow = '0 4px 8px rgba(0,0,0,0.2)';
m.style.overflow = 'auto';
m.innerHTML = d.innerHTML;
m.style.color = '#fff';
var closeButton = document.createElement('button');
closeButton.innerText = '✕';
closeButton.style.position = 'absolute';
closeButton.style.top = '10px';
closeButton.style.right = '10px';
closeButton.style.background = 'transparent';
closeButton.style.border = 'none';
closeButton.style.fontSize = '20px';
closeButton.style.cursor = 'pointer';
closeButton.onclick = function(){{
overlay.parentNode.removeChild(overlay);
}};
m.appendChild(closeButton);
overlay.appendChild(m);
document.body.appendChild(overlay);
}}
}})()">
<img src="{img_data}" alt="{title}">
<h3 style="font-size:20px; margin-bottom:5px; color:#fff;">{title}</h3>
<p style="color: white;font-size:16px;"><strong style="color: white;">Author:</strong> {author}<br>
<strong style="color: white;">Rating:</strong> {rating}<br>
<strong style="color: white;">Rank:</strong> {rank}<br>
<strong style="color: white;">Chapters:</strong> {chapters}</p>
<details id="summary{idx}" style="margin-top:5px; display:none;">
<summary style="color:#fff;"><strong>Summary</strong></summary>
<p style="margin-top:5px; color:#ccc;">{summary}</p>
</details>
</div>
"""
result_html += "</div>"
return result_html
with gr.Blocks(css="""
.gradio-container {
background-color: black !important;
color: white !important;
}
.gradio-container a {
color: white !important;
}
/* Target all possible .gr-title containers */
.gradio-container .gr-title,
.gradio-container [class*="svelte-"] .gr-title {
color: white !important;
text-align: center !important;
font-size: 26px !important;
font-family: 'Source Sans Pro', sans-serif !important;
}
/* Force styles to children elements */
.gradio-container .gr-title h3,
.gradio-container .gr-title span,
.gradio-container .gr-title a {
color: inherit !important;
font-size: inherit !important;
font-family: inherit !important;
text-decoration: none;
}
/* Specific footer styling */
.gradio-container .gr-title[style*="26px"] {
font-size: 26px !important;
margin-top: 20px;
}
/* Fix footer color */
.gradio-container .gr-title .prose,
.gradio-container .gr-title .prose * {
color: white !important;
}
/* Force underline for links */
.gradio-container .gr-title a {
text-decoration: underline !important;
}
/* Override Gradio's last-child margin */
.gradio-container .gr-title .prose :last-child {
margin-bottom: 0 !important;
color: white !important;
}
.gr-row,
.gr-row * {
background-color: black !important;
color: white !important;
outline: none !important;
box-shadow: none !important;
}
.gr-checkboxgroup, .gr-checkboxgroup * {
background-color: black;
color: white;
}
.gr-checkboxgroup label {
background-color: black;
color: white;
}
.gr-checkboxgroup input[type="checkbox"] {
background-color: black;
color: white;
border: 1px solid white;
}
.gr-button {
background-color: black !important;
color: white !important;
border: 1px solid white !important;
cursor: pointer !important;
}
.gr-button:hover {
background-color: #222 !important;
}
/* Remove all focus outlines */
input:focus,
textarea:focus,
select:focus,
button:focus {
outline: none !important;
box-shadow: none !important;
border-color: white !important;
}
""") as demo:
title_state = gr.State("Some popular Novel")
gr.Markdown(
"### Huy's Brilliant Library: Web Novel Corner",
elem_classes="gr-title"
)
# Query input
query_input = gr.Textbox(
lines=1,
placeholder="Enter your book query...",
label="Query",
elem_classes="gr-row"
)
split_categories = df['Categories'].apply(lambda x: x.split(',') if isinstance(x, str) else []).explode()
unique_categories = split_categories.str.strip().unique()
unique_categories = sorted(unique_categories)
category_filter = gr.CheckboxGroup(
choices=unique_categories,
label="Select Categories",
elem_classes="gr-checkboxgroup"
)
# Language selector
language_selector = gr.Radio(
choices=["English", "Vietnamese"],
label="Select Language",
value="English", # Default language
elem_classes="gr-row"
)
# Search button
recommend_button = gr.Button("Search", elem_classes="gr-button")
# Markdown title and HTML output for recommendations
title_markdown = gr.Markdown(elem_id="title", elem_classes="gr-title")
output_html = gr.HTML()
# Event handlers
query_input.submit(
fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
inputs=[query_input, category_filter, language_selector],
outputs=[title_state, output_html]
)
recommend_button.click(
fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
inputs=[query_input, category_filter, language_selector],
outputs=[title_state, output_html]
)
# Language toggle event
language_selector.change(
fn=lambda query, categories, language: recommend_books(query, categories, language) if query.strip() else default_top_books(language),
inputs=[query_input, category_filter, language_selector],
outputs=[title_state, output_html]
)
# Initial load
demo.load(
fn=lambda language: default_top_books(language),
inputs=[language_selector],
outputs=[title_state, output_html]
)
# Title update
title_state.change(
fn=lambda x: f"""<div class="gr-title">{x}</div>""",
inputs=[title_state],
outputs=[title_markdown]
)
# Layout
gr.Row(
query_input,
category_filter,
language_selector,
recommend_button,
elem_classes="gr-row"
)
gr.Markdown(
"""Hope you find some novels you love. Enjoy!<br>
<a href="https://www.webnovelworld.org/home" target="_blank" style="text-decoration: underline !important;">Check it out here</a>""",
elem_classes="gr-title"
)
demo.launch(share=True)