rijdev commited on
Commit
82fe775
·
verified ·
1 Parent(s): 950524e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -15
app.py CHANGED
@@ -2,40 +2,60 @@ import gradio as gr
2
  import pandas as pd
3
  from datasets import load_dataset
4
 
5
- # 1) Load the community MovieLens 100K (small split)
6
- movies_raw = load_dataset("bstds/movielens", "small", split="train")
 
 
 
 
 
 
 
 
 
7
 
8
- # 2) Convert to pandas and extract year
9
- df = movies_raw.to_pandas()
10
- df["year"] = pd.to_datetime(df["timestamp"], unit="s").dt.year
11
 
12
- # 3) Deduplicate metadata
13
- metadata = df[["title", "genres", "year"]].drop_duplicates()
 
 
 
 
 
 
 
 
 
14
 
15
- def recommend_by_genre_year(genre, year, top_k=5):
 
16
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower())
17
- mask_year = metadata["year"] >= year
18
  candidates = metadata[mask_genre & mask_year]
19
 
20
  if candidates.empty:
21
  return f"No {genre.title()} movies found from {year} onward."
22
 
23
  picks = candidates.sample(n=min(top_k, len(candidates)))
24
- return "\n".join(f"• {row.title} ({row.year})" for _, row in picks.iterrows())
 
25
 
26
  iface = gr.Interface(
27
  fn=recommend_by_genre_year,
28
  inputs=[
29
  gr.Textbox(label="Genre", placeholder="e.g. Action, Romance"),
30
- gr.Number(label="Release Year (≥)", value=2010),
31
- gr.Slider(1, 10, step=1, label="Number of Recommendations", value=5)
32
  ],
33
  outputs="text",
34
  title="🎬 Genre & Year-Based Movie Recommender",
35
  description="""
36
- Uses the community MovieLens-100K dataset (via `bstds/movielens` small split)
37
- to filter by genre and year (inferred from timestamp).
38
- """
39
  )
40
 
41
  if __name__ == "__main__":
 
2
  import pandas as pd
3
  from datasets import load_dataset
4
 
5
+ # 1) Load movie metadata and ratings from GroupLens URLs
6
+ movies = load_dataset(
7
+ "csv",
8
+ data_files="https://files.grouplens.org/datasets/movielens/ml-latest-small/movies.csv",
9
+ split="train",
10
+ )
11
+ ratings = load_dataset(
12
+ "csv",
13
+ data_files="https://files.grouplens.org/datasets/movielens/ml-latest-small/ratings.csv",
14
+ split="train",
15
+ )
16
 
17
+ # 2) Convert to pandas
18
+ movies_df = movies.to_pandas()
19
+ ratings_df = ratings.to_pandas()
20
 
21
+ # 3) Infer a release year per movie by taking the earliest rating timestamp
22
+ # (assuming users start rating soon after release)
23
+ ratings_df["year"] = pd.to_datetime(ratings_df["timestamp"], unit="s").dt.year
24
+ first_year = ratings_df.groupby("movieId")["year"].min().reset_index()
25
+
26
+ # 4) Merge metadata + inferred year, drop duplicates
27
+ metadata = (
28
+ movies_df.merge(first_year, on="movieId", how="inner")
29
+ .rename(columns={"year": "release_year"})
30
+ .drop_duplicates(subset=["movieId"])
31
+ )
32
 
33
+ def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
34
+ # filter by genre substring (case-insensitive) and release_year ≥ year
35
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower())
36
+ mask_year = metadata["release_year"] >= year
37
  candidates = metadata[mask_genre & mask_year]
38
 
39
  if candidates.empty:
40
  return f"No {genre.title()} movies found from {year} onward."
41
 
42
  picks = candidates.sample(n=min(top_k, len(candidates)))
43
+ return "\n".join(f"• {row.title} ({int(row.release_year)})"
44
+ for _, row in picks.iterrows())
45
 
46
  iface = gr.Interface(
47
  fn=recommend_by_genre_year,
48
  inputs=[
49
  gr.Textbox(label="Genre", placeholder="e.g. Action, Romance"),
50
+ gr.Number(label="Release Year (≥)", value=2010, precision=0),
51
+ gr.Slider(1, 10, step=1, label="Number of Recommendations", value=5),
52
  ],
53
  outputs="text",
54
  title="🎬 Genre & Year-Based Movie Recommender",
55
  description="""
56
+ Pulls MovieLens “ml-latest-small” metadata & ratings live from GroupLens
57
+ to filter by genre and release year (inferred). No local files needed.
58
+ """,
59
  )
60
 
61
  if __name__ == "__main__":