rijdev commited on
Commit
9c1ef16
·
verified ·
1 Parent(s): 799a6fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -1,15 +1,12 @@
1
-
2
  import gradio as gr
3
  import pandas as pd
4
- from datasets import load_dataset
5
-
6
- # 1) Load MovieLens metadata (small split, ~1.5K movies)
7
- ds = load_dataset("bstds/movielens", "small", split="train")
8
 
9
- # 2) Convert to pandas
10
- df = ds.to_pandas()
 
11
 
12
- # 3) Normalize genres (list -> string) and extract release year from title
13
  df["genres"] = df["genres"].apply(lambda g: "|".join(g) if isinstance(g, list) else str(g))
14
  df["release_year"] = (
15
  df["title"]
@@ -17,14 +14,12 @@ df["release_year"] = (
17
  .astype(pd.Int64Dtype(), errors='ignore')
18
  )
19
 
20
- # 4) Deduplicate metadata
21
  metadata = df[["title", "genres", "release_year"]].drop_duplicates()
22
 
23
-
24
  def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
25
- # filter by genre substring (case-insensitive)
26
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower(), na=False)
27
- # filter release year ≥ input year (treat NaN as 0)
28
  mask_year = metadata["release_year"].fillna(0) >= year
29
 
30
  candidates = metadata[mask_genre & mask_year]
@@ -37,6 +32,7 @@ def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
37
  for _, row in picks.iterrows()
38
  )
39
 
 
40
  iface = gr.Interface(
41
  fn=recommend_by_genre_year,
42
  inputs=[
@@ -47,11 +43,10 @@ iface = gr.Interface(
47
  outputs="text",
48
  title="🎬 Genre & Year-Based Movie Recommender",
49
  description="""
50
- Loads MovieLens metadata (small split) from the Hub, extracts release years from titles,
51
- normalizes genres, and filters by genre substring & year. No local files needed.
52
  """,
53
  )
54
 
55
  if __name__ == "__main__":
56
  iface.launch()
57
-
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import os
 
 
 
4
 
5
+ # 1) Load movies.csv from extracted ml-32m dataset
6
+ csv_path = os.path.join("ml-32m", "movies.csv") # Adjust path if needed
7
+ df = pd.read_csv(csv_path)
8
 
9
+ # 2) Normalize genres (if needed) and extract release year from title
10
  df["genres"] = df["genres"].apply(lambda g: "|".join(g) if isinstance(g, list) else str(g))
11
  df["release_year"] = (
12
  df["title"]
 
14
  .astype(pd.Int64Dtype(), errors='ignore')
15
  )
16
 
17
+ # 3) Deduplicate metadata
18
  metadata = df[["title", "genres", "release_year"]].drop_duplicates()
19
 
20
+ # 4) Recommendation function
21
  def recommend_by_genre_year(genre: str, year: int, top_k: int = 5) -> str:
 
22
  mask_genre = metadata["genres"].str.lower().str.contains(genre.lower(), na=False)
 
23
  mask_year = metadata["release_year"].fillna(0) >= year
24
 
25
  candidates = metadata[mask_genre & mask_year]
 
32
  for _, row in picks.iterrows()
33
  )
34
 
35
+ # 5) Gradio interface
36
  iface = gr.Interface(
37
  fn=recommend_by_genre_year,
38
  inputs=[
 
43
  outputs="text",
44
  title="🎬 Genre & Year-Based Movie Recommender",
45
  description="""
46
+ Loads local MovieLens metadata (ml-32m), extracts release years from titles,
47
+ normalizes genres, and filters by genre substring & year. No internet required.
48
  """,
49
  )
50
 
51
  if __name__ == "__main__":
52
  iface.launch()