exalth commited on
Commit
c455c9d
·
1 Parent(s): c07bde5

Initial Commit

Browse files
Dataset/data_Processed.csv ADDED
The diff for this file is too large to render. See raw diff
 
Dataset/userHistory.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ userID,namaWisata
2
+ 1,Hill of Gibeon
3
+ 1,Bukit Indah Sitalmak Talmak Sihotang
4
+ 1,Bukit Senyum
5
+ 1,Bukit Beta Tuk-tuk
6
+ 2,Tao Silalahi
7
+ 2,Pantai Silalahi
8
+ 2,Objek Wisata Pantai Paris
9
+ 2,Prapat bahari
10
+ 2,Wisata Alam Fishing-Camp Siarubung
11
+ 2,Aek Batu Sipolha
Evaluasi User to Item/evaluasiKFOLDLOG.ipynb ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "a95ae49e",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ " user_id avg_hit_rate\n",
14
+ "0 1 0.75\n",
15
+ "1 2 0.25\n",
16
+ "\n",
17
+ "Rata-rata Hit Rate Top-5 dengan 4 lipatan: 0.50\n"
18
+ ]
19
+ }
20
+ ],
21
+ "source": [
22
+ "# %%\n",
23
+ "import pandas as pd\n",
24
+ "import numpy as np\n",
25
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
26
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
27
+ "from sklearn.model_selection import KFold\n",
28
+ "\n",
29
+ "# %%\n",
30
+ "# Load data\n",
31
+ "df_wisata = pd.read_csv(\"../Dataset/data_Processed.csv\")\n",
32
+ "df_history = pd.read_csv(\"../Dataset/userHistory.csv\")\n",
33
+ "\n",
34
+ "# TF-IDF untuk semua tempat wisata\n",
35
+ "tfidf = TfidfVectorizer()\n",
36
+ "tfidf_matrix = tfidf.fit_transform(df_wisata['tags_joined'])\n",
37
+ "\n",
38
+ "# %%\n",
39
+ "def hit_rate_fold(train_visits, test_visits, top_n=5):\n",
40
+ " # Ambil indeks train dari df_wisata\n",
41
+ " visited_indices = df_wisata[df_wisata['title'].isin(train_visits)].index\n",
42
+ " if len(visited_indices) == 0:\n",
43
+ " return None # Profil tidak bisa dibentuk\n",
44
+ "\n",
45
+ " # Bangun profil user\n",
46
+ " user_profile_matrix = tfidf_matrix[visited_indices].mean(axis=0)\n",
47
+ " user_profile = np.asarray(user_profile_matrix).reshape(1, -1)\n",
48
+ "\n",
49
+ " # Hitung similarity\n",
50
+ " similarities = cosine_similarity(user_profile, tfidf_matrix).flatten()\n",
51
+ " df_wisata['similarity'] = similarities\n",
52
+ "\n",
53
+ " # Buang wisata yang ada di train\n",
54
+ " rekomendasi = df_wisata[~df_wisata['title'].isin(train_visits)]\n",
55
+ " rekomendasi = rekomendasi.sort_values(by='similarity', ascending=False)\n",
56
+ "\n",
57
+ " # Ambil Top-N rekomendasi\n",
58
+ " top_rekomendasi = rekomendasi['title'].head(top_n).tolist()\n",
59
+ "\n",
60
+ " # Hit Rate: berapa dari test_visits yang muncul di Top-N\n",
61
+ " hits = len(set(top_rekomendasi) & set(test_visits))\n",
62
+ " hit_rate = hits / len(test_visits)\n",
63
+ " return hit_rate\n",
64
+ "\n",
65
+ "# %%\n",
66
+ "def evaluate_user_kfold(user_id, top_n=5, k=5):\n",
67
+ " user_visits = df_history[df_history['userID'] == user_id]['namaWisata'].tolist()\n",
68
+ " \n",
69
+ " if len(user_visits) < k:\n",
70
+ " return None # Data terlalu sedikit untuk KFold\n",
71
+ "\n",
72
+ " kf = KFold(n_splits=k, shuffle=True, random_state=42)\n",
73
+ " fold_hit_rates = []\n",
74
+ "\n",
75
+ " for train_index, test_index in kf.split(user_visits):\n",
76
+ " train_visits = [user_visits[i] for i in train_index]\n",
77
+ " test_visits = [user_visits[i] for i in test_index]\n",
78
+ "\n",
79
+ " hr = hit_rate_fold(train_visits, test_visits, top_n=top_n)\n",
80
+ " if hr is not None:\n",
81
+ " fold_hit_rates.append(hr)\n",
82
+ "\n",
83
+ " if fold_hit_rates:\n",
84
+ " return {\n",
85
+ " 'user_id': user_id,\n",
86
+ " 'avg_hit_rate': np.mean(fold_hit_rates),\n",
87
+ " 'folds': k,\n",
88
+ " 'fold_hit_rates': fold_hit_rates\n",
89
+ " }\n",
90
+ " else:\n",
91
+ " return None\n",
92
+ "\n",
93
+ "# %%\n",
94
+ "# Evaluasi semua user secara dinamis\n",
95
+ "user_ids = df_history['userID'].unique()\n",
96
+ "all_results = []\n",
97
+ "\n",
98
+ "for uid in user_ids:\n",
99
+ " result = evaluate_user_kfold(uid, top_n=5, k=4)\n",
100
+ " if result:\n",
101
+ " all_results.append(result)\n",
102
+ "\n",
103
+ "# Hasil evaluasi ke DataFrame\n",
104
+ "df_eval = pd.DataFrame(all_results)\n",
105
+ "print(df_eval[['user_id', 'avg_hit_rate']])\n",
106
+ "\n",
107
+ "# Rata-rata keseluruhan\n",
108
+ "average_hit_rate_all = df_eval['avg_hit_rate'].mean()\n",
109
+ "print(f\"\\nRata-rata Hit Rate Top-5 dengan 4 lipatan: {average_hit_rate_all:.2f}\")\n",
110
+ "\n",
111
+ "# Simpan ke file jika perlu\n",
112
+ "# df_eval.to_csv(\"hasil_evaluasi_hit_rate_kfold.csv\", index=False)\n"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "id": "274fd69c",
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": []
122
+ }
123
+ ],
124
+ "metadata": {
125
+ "kernelspec": {
126
+ "display_name": "Python 3",
127
+ "language": "python",
128
+ "name": "python3"
129
+ },
130
+ "language_info": {
131
+ "codemirror_mode": {
132
+ "name": "ipython",
133
+ "version": 3
134
+ },
135
+ "file_extension": ".py",
136
+ "mimetype": "text/x-python",
137
+ "name": "python",
138
+ "nbconvert_exporter": "python",
139
+ "pygments_lexer": "ipython3",
140
+ "version": "3.12.6"
141
+ }
142
+ },
143
+ "nbformat": 4,
144
+ "nbformat_minor": 5
145
+ }
Evaluasi User to Item/evaluasiLOG.ipynb ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "a19b25b9",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ " user_id hit_rate\n",
14
+ "0 1 1.000000\n",
15
+ "1 2 0.333333\n",
16
+ "\n",
17
+ "Average Hit Rate@5: 0.67\n"
18
+ ]
19
+ }
20
+ ],
21
+ "source": [
22
+ "# %%\n",
23
+ "import pandas as pd\n",
24
+ "import numpy as np\n",
25
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
26
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
27
+ "from sklearn.model_selection import train_test_split\n",
28
+ "\n",
29
+ "# %%\n",
30
+ "# Load data\n",
31
+ "df_wisata = pd.read_csv(\"../Dataset/data_Processed.csv\")\n",
32
+ "df_history = pd.read_csv(\"../Dataset/userHistory.csv\")\n",
33
+ "\n",
34
+ "# TF-IDF\n",
35
+ "tfidf = TfidfVectorizer()\n",
36
+ "tfidf_matrix = tfidf.fit_transform(df_wisata['tags_joined'])\n",
37
+ "\n",
38
+ "# %%\n",
39
+ "def evaluate_user_hit(user_id, top_n=5):\n",
40
+ " user_visits = df_history[df_history['userID'] == user_id]['namaWisata'].tolist()\n",
41
+ " \n",
42
+ " if len(user_visits) < 2:\n",
43
+ " return None # Skip user dengan data terlalu sedikit\n",
44
+ "\n",
45
+ " train_visits, test_visits = train_test_split(user_visits, test_size=0.5, random_state=42)\n",
46
+ "\n",
47
+ " visited_indices = df_wisata[df_wisata['title'].isin(train_visits)].index\n",
48
+ " if visited_indices.empty:\n",
49
+ " return None\n",
50
+ "\n",
51
+ " user_profile_matrix = tfidf_matrix[visited_indices].mean(axis=0)\n",
52
+ " user_profile = np.asarray(user_profile_matrix).reshape(1, -1)\n",
53
+ "\n",
54
+ " similarities = cosine_similarity(user_profile, tfidf_matrix).flatten()\n",
55
+ " df_wisata['similarity'] = similarities\n",
56
+ "\n",
57
+ " rekomendasi = df_wisata[~df_wisata['title'].isin(train_visits)]\n",
58
+ " rekomendasi = rekomendasi.sort_values(by='similarity', ascending=False)\n",
59
+ "\n",
60
+ " top_rekomendasi = rekomendasi['title'].head(top_n).tolist()\n",
61
+ " hits = len(set(top_rekomendasi) & set(test_visits))\n",
62
+ " hit_rate = hits / len(test_visits)\n",
63
+ "\n",
64
+ " return {\n",
65
+ " 'user_id': user_id,\n",
66
+ " 'hit_rate': hit_rate,\n",
67
+ " 'hits': hits,\n",
68
+ " 'test_size': len(test_visits),\n",
69
+ " 'top_recommendations': top_rekomendasi,\n",
70
+ " 'test_visits': test_visits\n",
71
+ " }\n",
72
+ "\n",
73
+ "# %%\n",
74
+ "# Loop ke semua user\n",
75
+ "user_ids = df_history['userID'].unique()\n",
76
+ "results = []\n",
77
+ "\n",
78
+ "for uid in user_ids:\n",
79
+ " result = evaluate_user_hit(uid, top_n=5)\n",
80
+ " if result:\n",
81
+ " results.append(result)\n",
82
+ "\n",
83
+ "# Buat DataFrame dari hasil evaluasi\n",
84
+ "df_eval = pd.DataFrame(results)\n",
85
+ "\n",
86
+ "# Hitung rata-rata Hit Rate semua user\n",
87
+ "average_hit_rate = df_eval['hit_rate'].mean()\n",
88
+ "print(df_eval[['user_id', 'hit_rate']])\n",
89
+ "print(f\"\\nAverage Hit Rate@5: {average_hit_rate:.2f}\")\n",
90
+ "\n",
91
+ "# Simpan hasil jika perlu\n",
92
+ "# df_eval.to_csv(\"hasil_evaluasi_hit_rate.csv\", index=False)\n"
93
+ ]
94
+ }
95
+ ],
96
+ "metadata": {
97
+ "kernelspec": {
98
+ "display_name": "Python 3",
99
+ "language": "python",
100
+ "name": "python3"
101
+ },
102
+ "language_info": {
103
+ "codemirror_mode": {
104
+ "name": "ipython",
105
+ "version": 3
106
+ },
107
+ "file_extension": ".py",
108
+ "mimetype": "text/x-python",
109
+ "name": "python",
110
+ "nbconvert_exporter": "python",
111
+ "pygments_lexer": "ipython3",
112
+ "version": "3.12.6"
113
+ }
114
+ },
115
+ "nbformat": 4,
116
+ "nbformat_minor": 5
117
+ }
Kode Kotor/rekomendasiLOG.ipynb ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "6dc4cd6f",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "df_wisata = pd.read_csv(\"../Dataset/data_Processed.csv\")\n",
12
+ "df_history = pd.read_csv(\"../Dataset/userHistory.csv\")"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "id": "c3d4d826",
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
23
+ "\n",
24
+ "tfidf = TfidfVectorizer()\n",
25
+ "tfidf_matrix = tfidf.fit_transform(df_wisata['tags_joined'])\n"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 3,
31
+ "id": "787049ea",
32
+ "metadata": {},
33
+ "outputs": [
34
+ {
35
+ "name": "stdout",
36
+ "output_type": "stream",
37
+ "text": [
38
+ "TfidfVectorizer()\n"
39
+ ]
40
+ }
41
+ ],
42
+ "source": [
43
+ "print(tfidf)"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "id": "a4a2301c",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "# import pickle\n",
54
+ "# from scipy.sparse import save_npz\n",
55
+ "\n",
56
+ "# with open(\"tfidf_vectorizer.pkl\", \"wb\") as f:\n",
57
+ "# pickle.dump(tfidf, f)\n",
58
+ "\n",
59
+ "# save_npz(\"tfidf_matrix.npz\", tfidf_matrix)\n"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": 5,
65
+ "id": "ef9e4114",
66
+ "metadata": {},
67
+ "outputs": [
68
+ {
69
+ "data": {
70
+ "text/html": [
71
+ "<div>\n",
72
+ "<style scoped>\n",
73
+ " .dataframe tbody tr th:only-of-type {\n",
74
+ " vertical-align: middle;\n",
75
+ " }\n",
76
+ "\n",
77
+ " .dataframe tbody tr th {\n",
78
+ " vertical-align: top;\n",
79
+ " }\n",
80
+ "\n",
81
+ " .dataframe thead th {\n",
82
+ " text-align: right;\n",
83
+ " }\n",
84
+ "</style>\n",
85
+ "<table border=\"1\" class=\"dataframe\">\n",
86
+ " <thead>\n",
87
+ " <tr style=\"text-align: right;\">\n",
88
+ " <th></th>\n",
89
+ " <th>userID</th>\n",
90
+ " <th>namaWisata</th>\n",
91
+ " </tr>\n",
92
+ " </thead>\n",
93
+ " <tbody>\n",
94
+ " <tr>\n",
95
+ " <th>0</th>\n",
96
+ " <td>1</td>\n",
97
+ " <td>Hill of Gibeon</td>\n",
98
+ " </tr>\n",
99
+ " <tr>\n",
100
+ " <th>1</th>\n",
101
+ " <td>1</td>\n",
102
+ " <td>Bukit Indah Sitalmak Talmak Sihotang</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>2</th>\n",
106
+ " <td>1</td>\n",
107
+ " <td>Bukit Senyum</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>3</th>\n",
111
+ " <td>1</td>\n",
112
+ " <td>Bukit Beta Tuk-tuk</td>\n",
113
+ " </tr>\n",
114
+ " <tr>\n",
115
+ " <th>4</th>\n",
116
+ " <td>2</td>\n",
117
+ " <td>Tao Silalahi</td>\n",
118
+ " </tr>\n",
119
+ " </tbody>\n",
120
+ "</table>\n",
121
+ "</div>"
122
+ ],
123
+ "text/plain": [
124
+ " userID namaWisata\n",
125
+ "0 1 Hill of Gibeon\n",
126
+ "1 1 Bukit Indah Sitalmak Talmak Sihotang\n",
127
+ "2 1 Bukit Senyum\n",
128
+ "3 1 Bukit Beta Tuk-tuk\n",
129
+ "4 2 Tao Silalahi"
130
+ ]
131
+ },
132
+ "execution_count": 5,
133
+ "metadata": {},
134
+ "output_type": "execute_result"
135
+ }
136
+ ],
137
+ "source": [
138
+ "user_id = 2\n",
139
+ "user_history = df_history[df_history['userID'] == user_id]['namaWisata'].tolist()\n",
140
+ "# df_wisata.head()\n",
141
+ "df_history.head()"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 6,
147
+ "id": "4f78911d",
148
+ "metadata": {},
149
+ "outputs": [
150
+ {
151
+ "data": {
152
+ "text/html": [
153
+ "<div>\n",
154
+ "<style scoped>\n",
155
+ " .dataframe tbody tr th:only-of-type {\n",
156
+ " vertical-align: middle;\n",
157
+ " }\n",
158
+ "\n",
159
+ " .dataframe tbody tr th {\n",
160
+ " vertical-align: top;\n",
161
+ " }\n",
162
+ "\n",
163
+ " .dataframe thead th {\n",
164
+ " text-align: right;\n",
165
+ " }\n",
166
+ "</style>\n",
167
+ "<table border=\"1\" class=\"dataframe\">\n",
168
+ " <thead>\n",
169
+ " <tr style=\"text-align: right;\">\n",
170
+ " <th></th>\n",
171
+ " <th>title</th>\n",
172
+ " <th>link</th>\n",
173
+ " <th>image_url</th>\n",
174
+ " <th>rating</th>\n",
175
+ " <th>reviews</th>\n",
176
+ " <th>address</th>\n",
177
+ " <th>opening_hours</th>\n",
178
+ " <th>latitude</th>\n",
179
+ " <th>longitude</th>\n",
180
+ " <th>kategori</th>\n",
181
+ " <th>aktivitas</th>\n",
182
+ " <th>deskripsi</th>\n",
183
+ " <th>kecamatan</th>\n",
184
+ " <th>biaya_masuk</th>\n",
185
+ " <th>biaya_parkir_motor</th>\n",
186
+ " <th>biaya_parkir_mobil</th>\n",
187
+ " <th>tags_joined</th>\n",
188
+ " </tr>\n",
189
+ " </thead>\n",
190
+ " <tbody>\n",
191
+ " <tr>\n",
192
+ " <th>25</th>\n",
193
+ " <td>Objek Wisata Pantai Paris</td>\n",
194
+ " <td>https://www.google.com/maps/place/Objek+Wisata...</td>\n",
195
+ " <td>https://drive.google.com/drive/folders/170vaB7...</td>\n",
196
+ " <td>4.3</td>\n",
197
+ " <td>897</td>\n",
198
+ " <td>Tigaras, Kec. Dolok Pardamean, Kabupaten Simal...</td>\n",
199
+ " <td>Open 24 hours</td>\n",
200
+ " <td>2.80154</td>\n",
201
+ " <td>98.780056</td>\n",
202
+ " <td>Bahari</td>\n",
203
+ " <td>Berenang, Perahu, Banana Boat, Sepeda Air, San...</td>\n",
204
+ " <td>Objek Wisata Pantai Paris di Tigaras, Dolok Pa...</td>\n",
205
+ " <td>Dolok Pardamean</td>\n",
206
+ " <td>40000</td>\n",
207
+ " <td>0</td>\n",
208
+ " <td>0</td>\n",
209
+ " <td>berenang perahu banana boat sepeda air santai ...</td>\n",
210
+ " </tr>\n",
211
+ " </tbody>\n",
212
+ "</table>\n",
213
+ "</div>"
214
+ ],
215
+ "text/plain": [
216
+ " title \\\n",
217
+ "25 Objek Wisata Pantai Paris \n",
218
+ "\n",
219
+ " link \\\n",
220
+ "25 https://www.google.com/maps/place/Objek+Wisata... \n",
221
+ "\n",
222
+ " image_url rating reviews \\\n",
223
+ "25 https://drive.google.com/drive/folders/170vaB7... 4.3 897 \n",
224
+ "\n",
225
+ " address opening_hours \\\n",
226
+ "25 Tigaras, Kec. Dolok Pardamean, Kabupaten Simal... Open 24 hours \n",
227
+ "\n",
228
+ " latitude longitude kategori \\\n",
229
+ "25 2.80154 98.780056 Bahari \n",
230
+ "\n",
231
+ " aktivitas \\\n",
232
+ "25 Berenang, Perahu, Banana Boat, Sepeda Air, San... \n",
233
+ "\n",
234
+ " deskripsi kecamatan \\\n",
235
+ "25 Objek Wisata Pantai Paris di Tigaras, Dolok Pa... Dolok Pardamean \n",
236
+ "\n",
237
+ " biaya_masuk biaya_parkir_motor biaya_parkir_mobil \\\n",
238
+ "25 40000 0 0 \n",
239
+ "\n",
240
+ " tags_joined \n",
241
+ "25 berenang perahu banana boat sepeda air santai ... "
242
+ ]
243
+ },
244
+ "execution_count": 6,
245
+ "metadata": {},
246
+ "output_type": "execute_result"
247
+ }
248
+ ],
249
+ "source": [
250
+ "visited_wisata = df_wisata[df_wisata['title'].isin(user_history)]\n",
251
+ "visited_wisata.head(1)"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": 7,
257
+ "id": "7a4002e8",
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "import numpy as np\n",
262
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
263
+ "visited_indices = df_wisata[df_wisata['title'].isin(user_history)].index\n",
264
+ "user_profile_matrix = tfidf_matrix[visited_indices].mean(axis=0)\n",
265
+ "user_profile = np.asarray(user_profile_matrix).reshape(1, -1)"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 8,
271
+ "id": "f8a7c783",
272
+ "metadata": {},
273
+ "outputs": [
274
+ {
275
+ "name": "stdout",
276
+ "output_type": "stream",
277
+ "text": [
278
+ "(1, 921)\n"
279
+ ]
280
+ }
281
+ ],
282
+ "source": [
283
+ "print(user_profile.shape)"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "execution_count": 9,
289
+ "id": "563e3e22",
290
+ "metadata": {},
291
+ "outputs": [],
292
+ "source": [
293
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
294
+ "\n",
295
+ "similarities = cosine_similarity(user_profile, tfidf_matrix)\n",
296
+ "#print(similarities)"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 10,
302
+ "id": "b62c07e4",
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "data": {
307
+ "text/html": [
308
+ "<div>\n",
309
+ "<style scoped>\n",
310
+ " .dataframe tbody tr th:only-of-type {\n",
311
+ " vertical-align: middle;\n",
312
+ " }\n",
313
+ "\n",
314
+ " .dataframe tbody tr th {\n",
315
+ " vertical-align: top;\n",
316
+ " }\n",
317
+ "\n",
318
+ " .dataframe thead th {\n",
319
+ " text-align: right;\n",
320
+ " }\n",
321
+ "</style>\n",
322
+ "<table border=\"1\" class=\"dataframe\">\n",
323
+ " <thead>\n",
324
+ " <tr style=\"text-align: right;\">\n",
325
+ " <th></th>\n",
326
+ " <th>title</th>\n",
327
+ " <th>link</th>\n",
328
+ " <th>image_url</th>\n",
329
+ " <th>rating</th>\n",
330
+ " <th>reviews</th>\n",
331
+ " <th>address</th>\n",
332
+ " <th>opening_hours</th>\n",
333
+ " <th>latitude</th>\n",
334
+ " <th>longitude</th>\n",
335
+ " <th>kategori</th>\n",
336
+ " <th>aktivitas</th>\n",
337
+ " <th>deskripsi</th>\n",
338
+ " <th>kecamatan</th>\n",
339
+ " <th>biaya_masuk</th>\n",
340
+ " <th>biaya_parkir_motor</th>\n",
341
+ " <th>biaya_parkir_mobil</th>\n",
342
+ " <th>tags_joined</th>\n",
343
+ " <th>similarity</th>\n",
344
+ " </tr>\n",
345
+ " </thead>\n",
346
+ " <tbody>\n",
347
+ " <tr>\n",
348
+ " <th>0</th>\n",
349
+ " <td>Hill of Gibeon</td>\n",
350
+ " <td>https://www.google.com/maps/place/Hill+of+Gibe...</td>\n",
351
+ " <td>https://drive.google.com/drive/folders/1AVbEcO...</td>\n",
352
+ " <td>4.5</td>\n",
353
+ " <td>457</td>\n",
354
+ " <td>Kabupaten Toba Samosir, 21174, Sionggang Utara...</td>\n",
355
+ " <td>Open 24 hours</td>\n",
356
+ " <td>2.590898</td>\n",
357
+ " <td>98.9978849865071</td>\n",
358
+ " <td>Alam</td>\n",
359
+ " <td>Aktivitas Air, Berenang, Pemandangan, Santai, ...</td>\n",
360
+ " <td>Terletak di Kabupaten Toba, Sumatera Utara, Bu...</td>\n",
361
+ " <td>Lumban Julu</td>\n",
362
+ " <td>10000</td>\n",
363
+ " <td>2000</td>\n",
364
+ " <td>5000</td>\n",
365
+ " <td>aktivitas air berenang pemandangan santai foto...</td>\n",
366
+ " <td>0.278854</td>\n",
367
+ " </tr>\n",
368
+ " </tbody>\n",
369
+ "</table>\n",
370
+ "</div>"
371
+ ],
372
+ "text/plain": [
373
+ " title link \\\n",
374
+ "0 Hill of Gibeon https://www.google.com/maps/place/Hill+of+Gibe... \n",
375
+ "\n",
376
+ " image_url rating reviews \\\n",
377
+ "0 https://drive.google.com/drive/folders/1AVbEcO... 4.5 457 \n",
378
+ "\n",
379
+ " address opening_hours latitude \\\n",
380
+ "0 Kabupaten Toba Samosir, 21174, Sionggang Utara... Open 24 hours 2.590898 \n",
381
+ "\n",
382
+ " longitude kategori \\\n",
383
+ "0 98.9978849865071 Alam \n",
384
+ "\n",
385
+ " aktivitas \\\n",
386
+ "0 Aktivitas Air, Berenang, Pemandangan, Santai, ... \n",
387
+ "\n",
388
+ " deskripsi kecamatan \\\n",
389
+ "0 Terletak di Kabupaten Toba, Sumatera Utara, Bu... Lumban Julu \n",
390
+ "\n",
391
+ " biaya_masuk biaya_parkir_motor biaya_parkir_mobil \\\n",
392
+ "0 10000 2000 5000 \n",
393
+ "\n",
394
+ " tags_joined similarity \n",
395
+ "0 aktivitas air berenang pemandangan santai foto... 0.278854 "
396
+ ]
397
+ },
398
+ "execution_count": 10,
399
+ "metadata": {},
400
+ "output_type": "execute_result"
401
+ }
402
+ ],
403
+ "source": [
404
+ "\n",
405
+ "similarities = similarities.flatten()\n",
406
+ "#ngubah array multidimensi jadi 1 array\n",
407
+ "df_wisata['similarity'] = similarities\n",
408
+ "df_wisata.head(1)\n",
409
+ "\n"
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "execution_count": 11,
415
+ "id": "b73020b9",
416
+ "metadata": {},
417
+ "outputs": [
418
+ {
419
+ "name": "stdout",
420
+ "output_type": "stream",
421
+ "text": [
422
+ " title similarity\n",
423
+ "13 Pantai Indah Situngkir (PIS) 0.519932\n",
424
+ "15 Pantai Kenangan 0.493438\n",
425
+ "3 Pantai Ikan Mas Tandarabun 0.485319\n",
426
+ "20 Pantai Kasih 0.482357\n",
427
+ "16 pantai pasir putih 0.470786\n"
428
+ ]
429
+ }
430
+ ],
431
+ "source": [
432
+ "# Hapus yang sudah dikunjungi\n",
433
+ "rekomendasi = df_wisata[~df_wisata['title'].isin(user_history)]\n",
434
+ "rekomendasi = rekomendasi.sort_values(by='similarity', ascending=False)\n",
435
+ "\n",
436
+ "# Tampilkan Top-N\n",
437
+ "top_n = 5\n",
438
+ "print(rekomendasi[['title', 'similarity']].head(top_n))"
439
+ ]
440
+ }
441
+ ],
442
+ "metadata": {
443
+ "kernelspec": {
444
+ "display_name": "Python 3",
445
+ "language": "python",
446
+ "name": "python3"
447
+ },
448
+ "language_info": {
449
+ "codemirror_mode": {
450
+ "name": "ipython",
451
+ "version": 3
452
+ },
453
+ "file_extension": ".py",
454
+ "mimetype": "text/x-python",
455
+ "name": "python",
456
+ "nbconvert_exporter": "python",
457
+ "pygments_lexer": "ipython3",
458
+ "version": "3.12.6"
459
+ }
460
+ },
461
+ "nbformat": 4,
462
+ "nbformat_minor": 5
463
+ }
Rekomendasi Item to Item/rekomendasi.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rekomendasi_app_api.py
2
+ from flask import Flask, request, jsonify
3
+ import pandas as pd
4
+ import pickle
5
+ import os
6
+
7
+ app = Flask(__name__)
8
+
9
+ # === Load data dan model ===
10
+ base_dir = os.path.dirname(os.path.abspath(__file__))
11
+
12
+ df = pd.read_csv(os.path.join(base_dir, "..", "Dataset", "data_Processed.csv"))
13
+
14
+ with open(os.path.join(base_dir, "similarity_matrix.pkl"), "rb") as f:
15
+ similarity_matrix = pickle.load(f)
16
+
17
+ # === Fungsi rekomendasi ===
18
+ def rekomendasi_tempat(tempat_id, top_n=5):
19
+ if tempat_id < 0 or tempat_id >= len(df):
20
+ return None
21
+
22
+ sim_scores = list(enumerate(similarity_matrix[tempat_id]))
23
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
24
+ input_title = df.iloc[tempat_id]['title']
25
+ rekomendasi = []
26
+ for i, score in sim_scores:
27
+ rekomendasi.append({
28
+ 'title': df.iloc[i]['title'],
29
+ 'index': i,
30
+ 'kategori': df.iloc[i]['kategori'],
31
+ 'similarity': round(score, 3)
32
+ })
33
+
34
+ return {
35
+ "tempat_id": tempat_id,
36
+ "rekomendasi": rekomendasi,
37
+ "input_title": input_title
38
+ }
39
+
40
+ # === Endpoint Flask ===
41
+ @app.route("/recommenditi", methods=["GET"])
42
+ def rekomendasi_api():
43
+ tempat_id = request.args.get("tempat_id", type=int)
44
+
45
+ if tempat_id is None:
46
+ return jsonify({"error": "Parameter 'tempat_id' diperlukan."}), 400
47
+
48
+ hasil = rekomendasi_tempat(tempat_id)
49
+
50
+ if hasil is None:
51
+ return jsonify({"message": f"Tempat '{tempat_id}' tidak ditemukan."}), 404
52
+
53
+ return jsonify(hasil)
54
+
55
+ # === Run ===
56
+ if __name__ == "__main__":
57
+ app.run(debug=True)
Rekomendasi Item to Item/similarity_matrix.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc5a936cae12829abf36556df1e2b72e0f649a1dfd99eab7cd1f84023dc27bb
3
+ size 148130
Rekomendasi User to Item (CBF)/rekomendasi_deploy.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from scipy.sparse import load_npz
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import os
8
+
9
+ # === Inisialisasi Flask ===
10
+ app = Flask(__name__)
11
+
12
+ # === Load Model & Data Sekali Saja Saat Aplikasi Mulai ===
13
+ base_dir = os.path.dirname(os.path.abspath(__file__))
14
+
15
+ with open(os.path.join(base_dir, "tfidf_vectorizer.pkl"), "rb") as f:
16
+ tfidf = pickle.load(f)
17
+
18
+ tfidf_matrix = load_npz(os.path.join(base_dir, "tfidf_matrix.npz"))
19
+ df_wisata = pd.read_csv(os.path.join(base_dir,"..", "Dataset", "data_Processed.csv"))
20
+ df_history = pd.read_csv(os.path.join(base_dir,"..", "Dataset", "userHistory.csv"))
21
+
22
+ # === Fungsi Rekomendasi ===
23
+ def recommend_for_user(user_id, top_n=5):
24
+ user_history = df_history[df_history['userID'] == user_id]['namaWisata'].tolist()
25
+ visited_indices = df_wisata[df_wisata['title'].isin(user_history)].index
26
+
27
+ if len(visited_indices) == 0:
28
+ return []
29
+
30
+ user_profile_matrix = tfidf_matrix[visited_indices].mean(axis=0)
31
+ user_profile = np.asarray(user_profile_matrix).reshape(1, -1)
32
+ similarities = cosine_similarity(user_profile, tfidf_matrix).flatten()
33
+
34
+ df_temp = df_wisata.copy()
35
+ df_temp['similarity'] = similarities
36
+ rekomendasi = df_temp[~df_temp['title'].isin(user_history)]
37
+ rekomendasi = rekomendasi.sort_values(by='similarity', ascending=False)
38
+
39
+ return rekomendasi[['title', 'similarity']].head(top_n).to_dict(orient="records")
40
+
41
+ # === Endpoint API ===
42
+ @app.route("/recommenduti", methods=["GET"])
43
+ def recommend():
44
+ user_id = request.args.get("user_id", type=int)
45
+
46
+ if user_id is None:
47
+ return jsonify({"error": "Parameter user_id diperlukan."}), 400
48
+
49
+ hasil = recommend_for_user(user_id)
50
+
51
+ if not hasil:
52
+ return jsonify({"message": f"Tidak ada data history untuk user ID {user_id}."}), 404
53
+
54
+ return jsonify({"user_id": user_id, "rekomendasi": hasil})
55
+
56
+ # === Run Server ===
57
+ if __name__ == "__main__":
58
+ app.run(debug=True)
Rekomendasi User to Item (CBF)/tfidf_matrix.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faedb0b4226402f3a7ca92b0a889fd056870b9749bfd38104e5a48f387aa9a66
3
+ size 37767
Rekomendasi User to Item (CBF)/tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff55d00fd0f9f1db48e35f1b7f09d677f5c9174bbdb06da49440f4fee87ae947
3
+ size 18864