rijdev commited on
Commit
4475715
·
verified ·
1 Parent(s): ecf1596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
  from datasets import load_dataset
@@ -8,29 +9,33 @@ ds = load_dataset("bstds/movielens", "small", split="train")
8
  # 2) Convert to pandas
9
  df = ds.to_pandas()
10
 
11
- # 3) Extract release year from title, e.g. "Movie Title (1999)" 1999
 
12
  df["release_year"] = (
13
  df["title"]
14
- .str.extract(r"\((\d{4})\)") # capture 4-digit year
15
- .astype(pd.Int64Dtype()) # allow missing values
16
  )
17
 
18
  # 4) Deduplicate metadata
19
  metadata = df[["title", "genres", "release_year"]].drop_duplicates()
20
 
 
21
  def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
22
- # filter genre (case-insensitive substring)
23
- mask_genre = metadata["genres"].str.lower().str.contains(genre.lower())
24
- # filter release year ≥ input year (drop rows missing year)
25
- mask_year = metadata["release_year"].fillna(0) >= year
26
 
27
  candidates = metadata[mask_genre & mask_year]
28
  if candidates.empty:
29
  return f"No '{genre.title()}' movies found from {year} onward."
30
 
31
  picks = candidates.sample(n=min(top_k, len(candidates)))
32
- return "\n".join(f"• {row.title} ({int(row.release_year) if pd.notna(row.release_year) else 'Year N/A'})"
33
- for _, row in picks.iterrows())
 
 
34
 
35
  iface = gr.Interface(
36
  fn=recommend_by_genre_year,
@@ -43,9 +48,10 @@ iface = gr.Interface(
43
  title="🎬 Genre & Year-Based Movie Recommender",
44
  description="""
45
  Loads MovieLens metadata (small split) from the Hub, extracts release years from titles,
46
- and filters by genre substring & year. No local files needed.
47
  """,
48
  )
49
 
50
  if __name__ == "__main__":
51
  iface.launch()
 
 
1
+ ```python
2
  import gradio as gr
3
  import pandas as pd
4
  from datasets import load_dataset
 
9
  # 2) Convert to pandas
10
  df = ds.to_pandas()
11
 
12
+ # 3) Normalize genres (list -> string) and extract release year from title
13
+ df["genres"] = df["genres"].apply(lambda g: "|".join(g) if isinstance(g, list) else str(g))
14
  df["release_year"] = (
15
  df["title"]
16
+ .str.extract(r"\((\d{4})\)")[0]
17
+ .astype(pd.Int64Dtype(), errors='ignore')
18
  )
19
 
20
  # 4) Deduplicate metadata
21
  metadata = df[["title", "genres", "release_year"]].drop_duplicates()
22
 
23
+
24
  def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
25
+ # filter by genre substring (case-insensitive)
26
+ mask_genre = metadata["genres"].str.lower().str.contains(genre.lower(), na=False)
27
+ # filter release year ≥ input year (treat NaN as 0)
28
+ mask_year = metadata["release_year"].fillna(0) >= year
29
 
30
  candidates = metadata[mask_genre & mask_year]
31
  if candidates.empty:
32
  return f"No '{genre.title()}' movies found from {year} onward."
33
 
34
  picks = candidates.sample(n=min(top_k, len(candidates)))
35
+ return "\n".join(
36
+ f"• {row.title} ({int(row.release_year) if pd.notna(row.release_year) else 'Year N/A'})"
37
+ for _, row in picks.iterrows()
38
+ )
39
 
40
  iface = gr.Interface(
41
  fn=recommend_by_genre_year,
 
48
  title="🎬 Genre & Year-Based Movie Recommender",
49
  description="""
50
  Loads MovieLens metadata (small split) from the Hub, extracts release years from titles,
51
+ normalizes genres, and filters by genre substring & year. No local files needed.
52
  """,
53
  )
54
 
55
  if __name__ == "__main__":
56
  iface.launch()
57
+ ```