rijdev commited on
Commit
90cb33d
·
verified ·
1 Parent(s): 9e9eddc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -17
app.py CHANGED
@@ -2,30 +2,25 @@ import gradio as gr
2
  import pandas as pd
3
  from datasets import load_dataset
4
 
5
- # 1. Load MovieLens 100K from the Hub
6
- ml = load_dataset("movielens", "100k") # train + test splits
7
- df = pd.concat([
8
- ml["train"].to_pandas(),
9
- ml["test"].to_pandas()
10
- ], ignore_index=True)
11
-
12
- # 2. Extract year and prepare genres
13
  df["year"] = pd.to_datetime(df["timestamp"], unit="s").dt.year
14
- # movieId → title/genres mapping is in the "movies" config
15
- movies = load_dataset("movielens", "100k", split="train") \
16
- .to_pandas()[["movieId","title","genres"]].drop_duplicates()
17
- df = df.merge(movies, on="movieId", how="left")
18
 
19
- # 3. Deduplicate for metadata
20
- metadata = df[["title","genres","year"]].drop_duplicates()
21
 
22
  def recommend_by_genre_year(genre, year, top_k=5):
23
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower())
24
  mask_year = metadata["year"] >= year
25
  candidates = metadata[mask_genre & mask_year]
 
26
  if candidates.empty:
27
  return f"No {genre.title()} movies found from {year} onward."
28
- picks = candidates.sample(min(top_k, len(candidates)))
 
29
  return "\n".join(f"• {row.title} ({row.year})" for _, row in picks.iterrows())
30
 
31
  iface = gr.Interface(
@@ -38,8 +33,8 @@ iface = gr.Interface(
38
  outputs="text",
39
  title="🎬 Online MovieLens Recommender",
40
  description="""
41
- Pulls MovieLens 100K live from Hugging Face Datasets—no local files needed.
42
- Filters by genre substring and release year (inferred from timestamp).
43
  """
44
  )
45
 
 
2
  import pandas as pd
3
  from datasets import load_dataset
4
 
5
+ # 1) Load the community MovieLens 100K (includes title, genres, timestamp)
6
+ movies_raw = load_dataset("bstds/movielens", split="train") # :contentReference[oaicite:1]{index=1}
7
+
8
+ # 2) Convert to pandas and extract year
9
+ df = movies_raw.to_pandas()
 
 
 
10
  df["year"] = pd.to_datetime(df["timestamp"], unit="s").dt.year
 
 
 
 
11
 
12
+ # 3) Deduplicate metadata
13
+ metadata = df[["title", "genres", "year"]].drop_duplicates()
14
 
15
  def recommend_by_genre_year(genre, year, top_k=5):
16
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower())
17
  mask_year = metadata["year"] >= year
18
  candidates = metadata[mask_genre & mask_year]
19
+
20
  if candidates.empty:
21
  return f"No {genre.title()} movies found from {year} onward."
22
+
23
+ picks = candidates.sample(n=min(top_k, len(candidates)))
24
  return "\n".join(f"• {row.title} ({row.year})" for _, row in picks.iterrows())
25
 
26
  iface = gr.Interface(
 
33
  outputs="text",
34
  title="🎬 Online MovieLens Recommender",
35
  description="""
36
+ Uses the community MovieLens-100K dataset (via `bstds/movielens`) to filter by genre
37
+ and year (inferred from timestamp). No local files needed.
38
  """
39
  )
40