Elevi7 commited on
Commit
e1979ec
·
verified ·
1 Parent(s): 609546c

Update app.py

Browse files

Final working version with motivational quote dataset

Files changed (1) hide show
  1. app.py +25 -13
app.py CHANGED
@@ -2,47 +2,59 @@ import gradio as gr
2
  from datasets import load_dataset
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
 
 
 
 
 
5
 
6
- # Load dataset
7
- dataset = load_dataset("asuender/motivational-quotes", "quotes_extended", split="train")
8
 
9
- # Parse data
10
  quotes = [item["quote"] for item in dataset]
11
  authors = [item["author"] for item in dataset]
12
 
13
- # Since tags are stored as a **comma-separated string**, convert to list
14
- tags_list = [item["tags"].split(", ") if item["tags"] else [] for item in dataset]
 
 
 
 
 
15
 
16
- # Get unique tag values
17
- from itertools import chain
18
  all_tags = sorted(set(chain.from_iterable(tags_list)))
19
 
20
- # Load embedding model
21
  model = SentenceTransformer("all-MiniLM-L6-v2")
22
  quote_embeddings = model.encode(quotes, convert_to_tensor=True)
23
 
24
- # Recommendation function
25
  def recommend_quote(mood_input, selected_tag):
 
26
  filtered = [(q, a, i) for i, (q, a, t) in enumerate(zip(quotes, authors, tags_list)) if selected_tag in t]
27
  if not filtered:
28
- return "😕 Sorry, no quotes found for that category."
29
 
30
  f_quotes = [q for q, _, _ in filtered]
31
  f_authors = [a for _, a, _ in filtered]
32
  f_indices = [i for _, _, i in filtered]
33
  f_embeddings = quote_embeddings[f_indices]
34
 
 
35
  input_embedding = model.encode(mood_input, convert_to_tensor=True)
36
  similarities = util.pytorch_cos_sim(input_embedding, f_embeddings)
37
  top_k = torch.topk(similarities, k=min(3, len(f_quotes)))
38
 
 
39
  result = ""
40
- for idx in top_k.indices[0]:
41
- result += f"\"{f_quotes[idx]}\"\n— {f_authors[idx]}\n\n"
42
 
43
  return result.strip()
44
 
45
- # Gradio app
46
  iface = gr.Interface(
47
  fn=recommend_quote,
48
  inputs=[
 
2
  from datasets import load_dataset
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
5
+ from itertools import chain
6
+
7
+ # Load dataset from Hugging Face
8
+ raw_dataset = load_dataset("asuender/motivational-quotes", "quotes_extended", split="train")
9
+ dataset = list(raw_dataset)
10
 
11
+ # Print a sample to debug (optional – remove later if working)
12
+ print(dataset[0])
13
 
14
+ # Extract quotes, authors, and safely handle missing tags
15
  quotes = [item["quote"] for item in dataset]
16
  authors = [item["author"] for item in dataset]
17
 
18
+ tags_list = []
19
+ for item in dataset:
20
+ tags = item.get("tags")
21
+ if tags:
22
+ tags_list.append(tags.split(", "))
23
+ else:
24
+ tags_list.append([])
25
 
26
+ # Get unique tags for dropdown
 
27
  all_tags = sorted(set(chain.from_iterable(tags_list)))
28
 
29
+ # Load sentence transformer model for semantic similarity
30
  model = SentenceTransformer("all-MiniLM-L6-v2")
31
  quote_embeddings = model.encode(quotes, convert_to_tensor=True)
32
 
33
+ # Define quote recommendation function
34
  def recommend_quote(mood_input, selected_tag):
35
+ # Filter quotes by selected tag
36
  filtered = [(q, a, i) for i, (q, a, t) in enumerate(zip(quotes, authors, tags_list)) if selected_tag in t]
37
  if not filtered:
38
+ return "😔 Sorry, no quotes found for that category."
39
 
40
  f_quotes = [q for q, _, _ in filtered]
41
  f_authors = [a for _, a, _ in filtered]
42
  f_indices = [i for _, _, i in filtered]
43
  f_embeddings = quote_embeddings[f_indices]
44
 
45
+ # Encode user input and calculate similarity
46
  input_embedding = model.encode(mood_input, convert_to_tensor=True)
47
  similarities = util.pytorch_cos_sim(input_embedding, f_embeddings)
48
  top_k = torch.topk(similarities, k=min(3, len(f_quotes)))
49
 
50
+ # Format result
51
  result = ""
52
+ for i in top_k.indices[0]:
53
+ result += f"\"{f_quotes[i]}\"\n— {f_authors[i]}\n\n"
54
 
55
  return result.strip()
56
 
57
+ # Gradio interface
58
  iface = gr.Interface(
59
  fn=recommend_quote,
60
  inputs=[