midify file name and data file path

#2
by OhST - opened
Files changed (1) hide show
  1. Anime_RecSys.py โ†’ app.py +204 -203
Anime_RecSys.py โ†’ app.py RENAMED
@@ -1,203 +1,204 @@
1
- import pandas as pd
2
- import numpy as np
3
- import requests
4
- from bs4 import BeautifulSoup
5
- from googletrans import Translator
6
- import tensorflow as tf
7
- import gradio as gr
8
- class AnimeRecommender:
9
- def __init__(self, rating_path, anime_path, synopsis_path, model_path):
10
- self.rating_df = pd.read_csv(rating_path)
11
- self.df_anime = pd.read_csv(anime_path, low_memory=True)
12
- self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"])
13
- self.model = tf.keras.models.load_model(model_path)
14
- self.translator = Translator()
15
-
16
- self._preprocess_data()
17
-
18
- def _preprocess_data(self):
19
- # User and anime ID encoding
20
- user_ids = self.rating_df["user_id"].unique().tolist()
21
- user2user_encoded = {x: i for i, x in enumerate(user_ids)}
22
- anime_ids = self.rating_df["anime_id"].unique().tolist()
23
- anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
24
-
25
- self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded)
26
- self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded)
27
-
28
- self.n_users = len(user2user_encoded)
29
- self.n_animes = len(anime2anime_encoded)
30
-
31
- self.anime2anime_encoded = anime2anime_encoded
32
- self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
33
-
34
- # Normalize anime weights
35
- self.anime_weights = self._extract_weights('anime_embedding')
36
-
37
- # Fix anime names
38
- self.df_anime['anime_id'] = self.df_anime['MAL_ID']
39
- self.df_anime["eng_version"] = self.df_anime['English name']
40
- self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name)
41
-
42
- self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last')
43
- self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]]
44
-
45
- def _extract_weights(self, name):
46
- weight_layer = self.model.get_layer(name)
47
- weights = weight_layer.get_weights()[0]
48
- weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1))
49
- return weights
50
-
51
- def _get_anime_name(self, anime_id):
52
- try:
53
- name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0]
54
- if name is np.nan:
55
- name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0]
56
- except:
57
- name = 'Unknown'
58
- return name
59
-
60
- def get_anime_frame(self, anime):
61
- if isinstance(anime, int):
62
- return self.df_anime[self.df_anime.anime_id == anime]
63
- if isinstance(anime, str):
64
- return self.df_anime[self.df_anime.eng_version == anime]
65
-
66
- def get_sypnopsis(self, anime):
67
- if isinstance(anime, int):
68
- return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
69
- if isinstance(anime, str):
70
- return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0]
71
-
72
- def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False):
73
- try:
74
- encoded_indices = []
75
- input_anime_ids = []
76
- for name in anime_names:
77
- index = self.get_anime_frame(name).anime_id.values[0]
78
- input_anime_ids.append(index)
79
- encoded_index = self.anime2anime_encoded.get(index)
80
- encoded_indices.append(encoded_index)
81
-
82
- combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0)
83
- combined_weights = combined_weights / np.linalg.norm(combined_weights)
84
-
85
- dists = np.dot(self.anime_weights, combined_weights)
86
- sorted_dists = np.argsort(dists)
87
- n = n + len(input_anime_ids)
88
-
89
- if neg:
90
- closest = sorted_dists[:n]
91
- else:
92
- closest = sorted_dists[-n:]
93
-
94
- if return_dist:
95
- return dists, closest
96
-
97
- rindex = self.df_anime
98
- SimilarityArr = []
99
- for close in closest:
100
- decoded_id = self.anime_encoded2anime.get(close)
101
- if decoded_id in input_anime_ids:
102
- continue
103
- sypnopsis = self.get_sypnopsis(decoded_id)
104
- anime_frame = self.get_anime_frame(decoded_id)
105
- anime_name = anime_frame.eng_version.values[0]
106
- genre = anime_frame.Genres.values[0]
107
- similarity = dists[close]
108
- SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis})
109
-
110
- Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
111
- return Frame.drop(index=0)
112
- except Exception as e:
113
- print('{}!, Not Found in Anime list'.format(anime_names))
114
- print(str(e))
115
- return pd.DataFrame()
116
-
117
- def get_anime_url(self, name):
118
- anime = self.df_anime[self.df_anime['eng_version'] == name]
119
- if not anime.empty:
120
- mal_id = anime['anime_id'].values[0]
121
- anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_')
122
- return f"https://myanimelist.net/anime/{mal_id}/{anime_name}"
123
- else:
124
- print(f"{name}์— ํ•ด๋‹นํ•˜๋Š” ์• ๋‹ˆ๋ฉ”์ด์…˜์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
125
- return None
126
-
127
- def extract_image_url(self, url):
128
- try:
129
- response = requests.get(url)
130
- response.raise_for_status()
131
- except requests.RequestException as e:
132
- print(f"ํŽ˜์ด์ง€๋ฅผ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
133
- return None
134
-
135
- soup = BeautifulSoup(response.text, 'html.parser')
136
- image_tag = soup.find('img', {'data-src': True})
137
-
138
- if image_tag:
139
- return image_tag['data-src']
140
- else:
141
- print("์ด๋ฏธ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
142
- return None
143
-
144
- def NCF_Recommendation(self, a, b, c):
145
- anime_list = [a, b, c]
146
- anime_result = self.find_similar_animes_combined(anime_list, n=3)
147
-
148
- result1 = anime_result.loc[3, 'name']
149
- result2 = anime_result.loc[2, 'name']
150
- result3 = anime_result.loc[1, 'name']
151
-
152
- explain1 = anime_result.loc[3, 'sypnopsis']
153
- explain2 = anime_result.loc[2, 'sypnopsis']
154
- explain3 = anime_result.loc[1, 'sypnopsis']
155
-
156
- url1 = self.get_anime_url(result1)
157
- url2 = self.get_anime_url(result2)
158
- url3 = self.get_anime_url(result3)
159
-
160
- image1 = self.extract_image_url(url1)
161
- image2 = self.extract_image_url(url2)
162
- image3 = self.extract_image_url(url3)
163
-
164
- return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3
165
-
166
- # ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ •
167
- rating_path = 'C:/Users\PC\PycharmProjects/DNA_Anime_RecSys/Anime_RecSys/data/rating_complete.csv'
168
- anime_path = 'C:/Users\PC\PycharmProjects/DNA_Anime_RecSys/Anime_RecSys/data/anime.csv'
169
- synopsis_path = 'C:/Users\PC\PycharmProjects/DNA_Anime_RecSys/Anime_RecSys/data/anime_with_synopsis.csv'
170
- model_path = 'C:/Users\PC\PycharmProjects/DNA_Anime_RecSys/Anime_RecSys/data/anime_model.h5'
171
-
172
- # ๊ฐ์ฒด ์ƒ์„ฑ
173
- recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path)
174
-
175
- with gr.Blocks() as app:
176
- with gr.Row():
177
- a = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์ฒซ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
178
- b = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ๋‘ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
179
- c = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์„ธ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
180
-
181
- with gr.Row():
182
- with gr.Column():
183
- img1 = gr.Image(label="1๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
184
- output1 = gr.Textbox(label="1๏ธโƒฃ ์ฒซ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
185
- output2 = gr.Textbox(label="์ฒซ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
186
- with gr.Column():
187
- img2 = gr.Image(label="2๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
188
- output3 = gr.Textbox(label="2๏ธโƒฃ ๋‘๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
189
- output4 = gr.Textbox(label="๋‘ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
190
- with gr.Column():
191
- img3 = gr.Image(label="3๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
192
- output5 = gr.Textbox(label="3๏ธโƒฃ ์„ธ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
193
- output6 = gr.Textbox(label="์„ธ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
194
-
195
- btn = gr.Button("์ถ”์ฒœ์„ ๋ฐ›์•„๋ด…์‹œ๋‹ค!")
196
-
197
- btn.click(
198
- fn=recommender.NCF_Recommendation,
199
- inputs=[a, b, c],
200
- outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3]
201
- )
202
-
203
- app.launch(share=True)
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from googletrans import Translator
6
+ import tensorflow as tf
7
+ import gradio as gr
8
+ class AnimeRecommender:
9
+ def __init__(self, rating_path, anime_path, synopsis_path, model_path):
10
+ self.rating_df = pd.read_csv(rating_path)
11
+ self.df_anime = pd.read_csv(anime_path, low_memory=True)
12
+ self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"])
13
+ self.model = tf.keras.models.load_model(model_path)
14
+ self.translator = Translator()
15
+
16
+ self._preprocess_data()
17
+
18
+ def _preprocess_data(self):
19
+ # User and anime ID encoding
20
+ user_ids = self.rating_df["user_id"].unique().tolist()
21
+ user2user_encoded = {x: i for i, x in enumerate(user_ids)}
22
+ anime_ids = self.rating_df["anime_id"].unique().tolist()
23
+ anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
24
+
25
+ self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded)
26
+ self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded)
27
+
28
+ self.n_users = len(user2user_encoded)
29
+ self.n_animes = len(anime2anime_encoded)
30
+
31
+ self.anime2anime_encoded = anime2anime_encoded
32
+ self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
33
+
34
+ # Normalize anime weights
35
+ self.anime_weights = self._extract_weights('anime_embedding')
36
+
37
+ # Fix anime names
38
+ self.df_anime['anime_id'] = self.df_anime['MAL_ID']
39
+ self.df_anime["eng_version"] = self.df_anime['English name']
40
+ self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name)
41
+
42
+ self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last')
43
+ self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]]
44
+
45
+ def _extract_weights(self, name):
46
+ weight_layer = self.model.get_layer(name)
47
+ weights = weight_layer.get_weights()[0]
48
+ weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1))
49
+ return weights
50
+
51
+ def _get_anime_name(self, anime_id):
52
+ try:
53
+ name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0]
54
+ if name is np.nan:
55
+ name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0]
56
+ except:
57
+ name = 'Unknown'
58
+ return name
59
+
60
+ def get_anime_frame(self, anime):
61
+ if isinstance(anime, int):
62
+ return self.df_anime[self.df_anime.anime_id == anime]
63
+ if isinstance(anime, str):
64
+ return self.df_anime[self.df_anime.eng_version == anime]
65
+
66
+ def get_sypnopsis(self, anime):
67
+ if isinstance(anime, int):
68
+ return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
69
+ if isinstance(anime, str):
70
+ return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0]
71
+
72
+ def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False):
73
+ try:
74
+ encoded_indices = []
75
+ input_anime_ids = []
76
+ for name in anime_names:
77
+ index = self.get_anime_frame(name).anime_id.values[0]
78
+ input_anime_ids.append(index)
79
+ encoded_index = self.anime2anime_encoded.get(index)
80
+ encoded_indices.append(encoded_index)
81
+
82
+ combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0)
83
+ combined_weights = combined_weights / np.linalg.norm(combined_weights)
84
+
85
+ dists = np.dot(self.anime_weights, combined_weights)
86
+ sorted_dists = np.argsort(dists)
87
+ n = n + len(input_anime_ids)
88
+
89
+ if neg:
90
+ closest = sorted_dists[:n]
91
+ else:
92
+ closest = sorted_dists[-n:]
93
+
94
+ if return_dist:
95
+ return dists, closest
96
+
97
+ rindex = self.df_anime
98
+ SimilarityArr = []
99
+ for close in closest:
100
+ decoded_id = self.anime_encoded2anime.get(close)
101
+ if decoded_id in input_anime_ids:
102
+ continue
103
+ sypnopsis = self.get_sypnopsis(decoded_id)
104
+ anime_frame = self.get_anime_frame(decoded_id)
105
+ anime_name = anime_frame.eng_version.values[0]
106
+ genre = anime_frame.Genres.values[0]
107
+ similarity = dists[close]
108
+ SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis})
109
+
110
+ Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)
111
+ return Frame.drop(index=0)
112
+ except Exception as e:
113
+ print('{}!, Not Found in Anime list'.format(anime_names))
114
+ print(str(e))
115
+ return pd.DataFrame()
116
+
117
+ def get_anime_url(self, name):
118
+ anime = self.df_anime[self.df_anime['eng_version'] == name]
119
+ if not anime.empty:
120
+ mal_id = anime['anime_id'].values[0]
121
+ anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_')
122
+ return f"https://myanimelist.net/anime/{mal_id}/{anime_name}"
123
+ else:
124
+ print(f"{name}์— ํ•ด๋‹นํ•˜๋Š” ์• ๋‹ˆ๋ฉ”์ด์…˜์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
125
+ return None
126
+
127
+ def extract_image_url(self, url):
128
+ try:
129
+ response = requests.get(url)
130
+ response.raise_for_status()
131
+ except requests.RequestException as e:
132
+ print(f"ํŽ˜์ด์ง€๋ฅผ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
133
+ return None
134
+
135
+ soup = BeautifulSoup(response.text, 'html.parser')
136
+ image_tag = soup.find('img', {'data-src': True})
137
+
138
+ if image_tag:
139
+ return image_tag['data-src']
140
+ else:
141
+ print("์ด๋ฏธ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
142
+ return None
143
+
144
+ def NCF_Recommendation(self, a, b, c):
145
+ anime_list = [a, b, c]
146
+ anime_result = self.find_similar_animes_combined(anime_list, n=3)
147
+
148
+ result1 = anime_result.loc[3, 'name']
149
+ result2 = anime_result.loc[2, 'name']
150
+ result3 = anime_result.loc[1, 'name']
151
+
152
+ explain1 = anime_result.loc[3, 'sypnopsis']
153
+ explain2 = anime_result.loc[2, 'sypnopsis']
154
+ explain3 = anime_result.loc[1, 'sypnopsis']
155
+
156
+ url1 = self.get_anime_url(result1)
157
+ url2 = self.get_anime_url(result2)
158
+ url3 = self.get_anime_url(result3)
159
+
160
+ image1 = self.extract_image_url(url1)
161
+ image2 = self.extract_image_url(url2)
162
+ image3 = self.extract_image_url(url3)
163
+
164
+ return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3
165
+
166
+ # ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ •
167
+ rating_path = 'https://huggingface.co/spaces/OhST/Anime_RecSys/data/rating_complete.csv'
168
+ anime_path = 'https://huggingface.co/spaces/OhST/Anime_RecSys/data/anime.csv'
169
+ synopsis_path = 'https://huggingface.co/spaces/OhST/Anime_RecSys/data/anime_with_synopsis.csv'
170
+ model_path = 'https://huggingface.co/spaces/OhST/Anime_RecSys/data/anime_model.h5'
171
+
172
+
173
+ # ๊ฐ์ฒด ์ƒ์„ฑ
174
+ recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path)
175
+
176
+ with gr.Blocks() as app:
177
+ with gr.Row():
178
+ a = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์ฒซ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
179
+ b = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ๋‘ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
180
+ c = gr.Textbox(label="๋„ˆ์˜ ์ตœ์•  ์• ๋‹ˆ ์„ธ ๋ฒˆ์งธ๋ฅผ ์ž‘์„ฑํ•ด๋ด!")
181
+
182
+ with gr.Row():
183
+ with gr.Column():
184
+ img1 = gr.Image(label="1๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
185
+ output1 = gr.Textbox(label="1๏ธโƒฃ ์ฒซ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
186
+ output2 = gr.Textbox(label="์ฒซ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
187
+ with gr.Column():
188
+ img2 = gr.Image(label="2๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
189
+ output3 = gr.Textbox(label="2๏ธโƒฃ ๋‘๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
190
+ output4 = gr.Textbox(label="๋‘ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
191
+ with gr.Column():
192
+ img3 = gr.Image(label="3๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ")
193
+ output5 = gr.Textbox(label="3๏ธโƒฃ ์„ธ๋ฒˆ์งธ ์• ๋‹ˆ ์ถ”์ฒœ!")
194
+ output6 = gr.Textbox(label="์„ธ ๋ฒˆ์งธ ์• ๋‹ˆ ์„ค๋ช…", interactive=False)
195
+
196
+ btn = gr.Button("์ถ”์ฒœ์„ ๋ฐ›์•„๋ด…์‹œ๋‹ค!")
197
+
198
+ btn.click(
199
+ fn=recommender.NCF_Recommendation,
200
+ inputs=[a, b, c],
201
+ outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3]
202
+ )
203
+
204
+ app.launch(share=True)