chrisaldikaraharja commited on
Commit
53ad791
·
verified ·
1 Parent(s): 784c768

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -42
app.py CHANGED
@@ -1,46 +1,82 @@
 
1
  import streamlit as st
 
 
 
2
  import kagglehub
3
- import os
4
- import random
5
- from PIL import Image
6
 
7
  # Step 1: Download the latest version of the dataset and get the path
8
- path = kagglehub.dataset_download("imreallyjohn/cartoonset10k")
9
- images_folder = "/home/user/.cache/kagglehub/datasets/imreallyjohn/cartoonset10k/versions/1" # Adjusted root directory
10
-
11
- # Display the dataset path
12
- st.write("Path to dataset files:", images_folder)
13
-
14
- # Step 2: Recursively find all image files in the dataset directory
15
- image_files = []
16
- for root, _, files in os.walk(images_folder):
17
- for file in files:
18
- if file.endswith('.png'):
19
- image_files.append(os.path.join(root, file)) # Full path to each image
20
-
21
- # Get the count of images found
22
- num_images = len(image_files)
23
- st.write(f"Total images loaded: {num_images}")
24
-
25
- # Display an error if no images are found
26
- if num_images == 0:
27
- st.error("No available images to display. Please check the folder path and contents.")
28
- else:
29
- # Button to generate a random avatar if images are available
30
- if st.button("Generate Random Avatar"):
31
- # Step 3: Select a random available image
32
- random_image_path = random.choice(image_files) # Randomly select an image path
33
-
34
- # Load and display the selected image
35
- selected_image = Image.open(random_image_path)
36
- st.image(selected_image, caption=f"Displayed image: {os.path.basename(random_image_path)}")
37
- st.write(f"Displayed image: {os.path.basename(random_image_path)}")
38
-
39
- # Step 4: Add a download button for the selected image
40
- with open(random_image_path, "rb") as file:
41
- st.download_button(
42
- label="Download Avatar",
43
- data=file,
44
- file_name=os.path.basename(random_image_path),
45
- mime="image/png"
46
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
  import streamlit as st
3
+ from surprise import Dataset, Reader, SVD
4
+ from surprise.model_selection import train_test_split
5
+ from collections import defaultdict
6
  import kagglehub
 
 
 
7
 
8
  # Step 1: Download the latest version of the dataset and get the path
9
+ path = kagglehub.download_dataset("ashpalsingh1525/imdb-movies-dataset")
10
+
11
+ # Step 2: Define the dataset folder path
12
+ dataset_folder = "/home/user/.cache/kagglehub/datasets/ashpalsingh1525/imdb-movies-dataset/versions/1"
13
+
14
+ # Step 3: Define the CSV file path (Update if the filename is different)
15
+ dataset_path = f"{dataset_folder}/imdb_movies.csv"
16
+
17
+ # Load the dataset
18
+ df = pd.read_csv(dataset_path)
19
+
20
+ # Ensure all categorical columns are strings
21
+ categorical_columns = ['genre', 'orig_title', 'orig_lang', 'country', 'crew']
22
+
23
+ for col in categorical_columns:
24
+ df[col] = df[col].astype(str) # Convert to string explicitly
25
+
26
+ # Check unique values in genre column (to ensure it's not numerical)
27
+ if df['genre'].str.isnumeric().all():
28
+ print("Warning: Genre column is numeric. Mapping needed.")
29
+ genre_mapping = {i: f"Genre_{i}" for i in df['genre'].unique()}
30
+ df['genre'] = df['genre'].map(genre_mapping)
31
+
32
+ # Prepare dataset for Surprise
33
+ reader = Reader(rating_scale=(df['score'].min(), df['score'].max()))
34
+ data = Dataset.load_from_df(df[['orig_title', 'orig_lang', 'score']], reader)
35
+
36
+ # Train collaborative filtering model
37
+ trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
38
+ model = SVD(n_factors=50, random_state=42)
39
+ model.fit(trainset)
40
+
41
+ # Function to get movie recommendations
42
+ def get_recommendations(selected_movies, genre):
43
+ if not selected_movies:
44
+ return ["Please select at least one movie."]
45
+
46
+ # Filter dataset by genre
47
+ filtered_movies = df[df['genre'] == genre]
48
+
49
+ # Store average scores of all movies
50
+ movie_scores = defaultdict(float)
51
+
52
+ # Predict ratings for all movies in the filtered dataset
53
+ for movie in filtered_movies['orig_title'].unique():
54
+ est_score = model.predict(uid='user', iid=movie).est
55
+ movie_scores[movie] = est_score
56
+
57
+ # Sort movies by predicted score (descending)
58
+ recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True)
59
+
60
+ # Exclude already selected movies
61
+ recommended_movies = [movie for movie, _ in recommended_movies if movie not in selected_movies]
62
+
63
+ return recommended_movies[:5] # Return top 5 recommendations
64
+
65
+ # Streamlit UI
66
+ st.title("🎬 Movie Recommendation System")
67
+
68
+ # Genre selection
69
+ selected_genre = st.selectbox("Select a Genre", sorted(df['genre'].unique().tolist()))
70
+
71
+ # Get available movies for the selected genre
72
+ movies_in_genre = df[df['genre'] == selected_genre]['orig_title'].unique().tolist()
73
+
74
+ # Movie selection
75
+ selected_movies = st.multiselect("Select Up to 3 Movies", movies_in_genre, max_selections=3)
76
+
77
+ # Recommendation button
78
+ if st.button("Get Recommendations"):
79
+ recommendations = get_recommendations(selected_movies, selected_genre)
80
+ st.subheader("Recommended Movies:")
81
+ for movie in recommendations:
82
+ st.write(f"- {movie}")