import gradio as gr import pandas as pd import os # 1) Load movies.csv from extracted ml-32m dataset csv_path = os.path.join("movies.csv") # Adjust path if needed df = pd.read_csv(csv_path) # 2) Normalize genres and extract release year from title df["genres"] = df["genres"].apply(lambda g: "|".join(g) if isinstance(g, list) else str(g)) df["release_year"] = ( df["title"] .str.extract(r"\((\d{4})\)")[0] .astype(pd.Int64Dtype(), errors='ignore') ) # 3) Deduplicate metadata metadata = df[["title", "genres", "release_year"]].drop_duplicates() # 4) Extract unique genres all_genres = set() df["genres"].str.split("|").apply(all_genres.update) genre_list = sorted(all_genres) # 5) Recommendation function with year range and genre check def recommend_by_genre_and_year_range(genre: str, start_year: int, end_year: int, top_k: int = 5) -> str: if not genre: return "⚠️ Please select a genre." mask_genre = metadata["genres"].str.lower().str.contains(genre.lower(), na=False) year_col = metadata["release_year"].fillna(0) mask_year = (year_col >= start_year) & (year_col <= end_year) candidates = metadata[mask_genre & mask_year] if candidates.empty: return f"No '{genre.title()}' movies found between {start_year} and {end_year}." picks = candidates.sample(n=min(top_k, len(candidates))) return "\n".join( f"• {row.title} ({int(row.release_year) if pd.notna(row.release_year) else 'Year N/A'})" for _, row in picks.iterrows() ) # 6) Gradio interface iface = gr.Interface( fn=recommend_by_genre_and_year_range, inputs=[ gr.Dropdown(choices=genre_list, label="Select Genre", value=None), gr.Number(label="Start Year", value=1990, precision=0), gr.Number(label="End Year", value=1995, precision=0), gr.Number(label="Number of Recommendations", value=5), ], outputs="text", title="🎬 Movie Recommender by Genre & Year Range", description=""" Loads local MovieLens metadata (ml-32m), extracts release years from titles, normalizes genres, and filters by genre and a custom year range. """ ) if __name__ == "__main__": iface.launch()