File size: 2,879 Bytes
94d3746
 
 
 
 
 
75596a7
94d3746
75596a7
 
 
 
 
94d3746
75596a7
 
 
94d3746
 
75596a7
94d3746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75596a7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import torch
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentence_transformers.util import cos_sim
import gradio as gr

# --- FIX: Dynamically select the device ---
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS device is available. Using M1/M2 GPU!")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA device is available. Using NVIDIA GPU!")
else:
    device = torch.device("cpu")
    print("No GPU available. Falling back to CPU.")


model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name, device=device)
print("Loading dataset...")

ds = load_dataset("pszemraj/goodreads-bookgenres", "default")
df = ds['train'].to_pandas()
# --- FIX: Print columns to find the correct name ---
print("Available columns:", df.columns.tolist())

# Drop rows with missing descriptions to avoid errors during encoding
df.dropna(subset=['Description'], inplace=True)
print("Dataset loaded and cleaned. Head of DataFrame:")
print(df.head())
# --- 2. Generate Book Embeddings ---
print("Generating book embeddings...")
# Encode all descriptions at once for efficiency
book_descriptions = df['Description'].tolist()
book_embeddings = model.encode(book_descriptions, convert_to_tensor=True, show_progress_bar=True)
print("Embeddings generated.")

# --- 3. Define Recommendation Function ---

def recommend_books(query, top_k=5):
    """
        Finds and returns the top_k most similar books to a given query.
        """
    query_embedding = model.encode(query, convert_to_tensor=True)
    # Calculate cosine similarity between the query and all book embeddings
    cosine_scores = cos_sim(query_embedding, book_embeddings)
    # Get the indices of the top k most similar books
    top_k_indices = torch.topk(cosine_scores, k=top_k)[1].squeeze()
    # --- FIX: Move the tensor to the CPU before using it with pandas ---
    top_k_indices_cpu = top_k_indices.cpu()

    # Use iloc to retrieve the corresponding book information from the DataFrame
    # Use iloc to retrieve the corresponding book information from the DataFrame
    # Use .tolist() to convert it into a simple Python list of integers
    recommended_books = df.iloc[top_k_indices_cpu.tolist()]
    return recommended_books[['Book', 'Description']]


# --- 4. Define Gradio Interface and Launch ---
print("Launching Gradio interface...")
gr_interface = gr.Interface(
    fn=recommend_books,
    inputs=gr.Textbox(lines=2, placeholder="Enter a book topic, genre, or title..."),
    outputs=gr.Dataframe(headers=["Book", "Description"]),
    title="Book Recommendation System",
    description="Get book recommendations based on your query.",
    examples=["A thrilling detective story", "A heartwarming novel about friendship", "Science fiction about space travel"]
)

gr_interface.launch(share=True)