Rekomendasi Materi Belajar/edtech/backend/models/recommenders/collaborative/collab_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4aef73c6272415cb11002c1ff5c96f65587498acaa7c86ad4f7167d1d73fe48
3
+ size 6080
Rekomendasi Materi Belajar/edtech/backend/models/recommenders/content_based/content_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d1a2f5acb72fa4e6c3825586d578da46d850c31d82883ef50f618789722977
3
+ size 5211833
Rekomendasi Materi Belajar/edtech/backend/models/recommenders/hybrid/hybrid_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d173427052471e467df306ab61013e0599cfb0a80ff3805e464f9b7a25166933
3
+ size 32
Rekomendasi Materi Belajar/edtech/backend/src/app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/app.py/recommendation
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ from contextlib import asynccontextmanager
6
+ import joblib
7
+ import pandas as pd
8
+ from typing import List, Optional
9
+ import uvicorn
10
+ from pathlib import Path
11
+ from recommendation.collaborative import CollaborativeFiltering
12
+ from recommendation.content_based import ContentBasedRecommender
13
+ from recommendation.hybrid import HybridRecommender
14
+
15
+ # ===== KONFIGURASI SERVER =====
16
+ HOST = "0.0.0.0" #untuk deploy hugging face
17
+ PORT = 8025
18
+ RELOAD = True # Set False di production
19
+ WORKERS = 1
20
+
21
+ # ===== LIFESPAN MANAGEMENT =====
22
+ @asynccontextmanager
23
+ async def lifespan(app: FastAPI):
24
+ """Mengelola siklus hidup aplikasi dan inisialisasi model"""
25
+ print("Memuat model rekomendasi...")
26
+ try:
27
+ # Load semua model
28
+ app.state.collab_model = CollaborativeFiltering.load_model(COLLAB_MODEL_PATH)
29
+ app.state.content_model = ContentBasedRecommender.load_model(CONTENT_MODEL_PATH)
30
+ app.state.hybrid_model = HybridRecommender.load_model(
31
+ collab_path=COLLAB_MODEL_PATH,
32
+ content_path=CONTENT_MODEL_PATH,
33
+ hybrid_path=HYBRID_MODEL_PATH
34
+ )
35
+ print("✅ Model berhasil dimuat!")
36
+ except Exception as e:
37
+ print(f"❌ Gagal memuat model: {str(e)}")
38
+ raise HTTPException(status_code=500, detail="Gagal memuat model")
39
+ yield
40
+ print("🛑 Server dimatikan")
41
+
42
+ # ===== INISIALISASI APLIKASI =====
43
+ app = FastAPI(
44
+ title="Sistem Rekomendasi Materi Pembelajaran",
45
+ description="API untuk memberikan rekomendasi materi pembelajaran personalisasi",
46
+ version="1.0.2",
47
+ lifespan=lifespan,
48
+ docs_url="/docs",
49
+ redoc_url="/redoc"
50
+ )
51
+
52
+ # ===== KONFIGURASI CORS =====
53
+ app.add_middleware(
54
+ CORSMiddleware,
55
+ allow_origins=["http://localhost:3025"],
56
+ allow_credentials=True,
57
+ allow_methods=["*"],
58
+ allow_headers=["*"],
59
+ )
60
+
61
+ # ===== PATH MODEL =====
62
+ MODEL_DIR = Path("models/recommenders")
63
+ COLLAB_MODEL_PATH = MODEL_DIR / "collaborative/collab_model.joblib"
64
+ CONTENT_MODEL_PATH = MODEL_DIR / "content_based/content_model.joblib"
65
+ HYBRID_MODEL_PATH = MODEL_DIR / "hybrid/hybrid_model.joblib"
66
+
67
+ # ===== SCHEMA REQUEST/RESPONSE =====
68
+ class RecommendationRequest(BaseModel):
69
+ user_id: str
70
+ user_history: List[str]
71
+ n_recommendations: int = 5
72
+ algorithm: str = "hybrid"
73
+
74
+ class MaterialRecommendationRequest(BaseModel):
75
+ material_id: str
76
+ n_recommendations: int = 5
77
+
78
+ class RecommendationItem(BaseModel):
79
+ material_id: str
80
+ score: float
81
+ confidence: float = 0.0 # Tambahan field baru
82
+
83
+ class RecommendationResponse(BaseModel):
84
+ success: bool
85
+ recommendations: List[RecommendationItem]
86
+ algorithm: str
87
+ message: Optional[str] = None
88
+
89
+ # ===== ENDPOINT API =====
90
+ @app.get("/")
91
+ async def root():
92
+ return {
93
+ "message": "Selamat datang di API Rekomendasi Pembelajaran",
94
+ "version": app.version,
95
+ "docs": f"http://{HOST}:{PORT}/docs"
96
+ }
97
+
98
+ @app.post("/recommend", response_model=RecommendationResponse)
99
+ async def get_recommendations(request: RecommendationRequest):
100
+ try:
101
+ # Validasi input
102
+ if not request.user_id:
103
+ raise HTTPException(
104
+ status_code=400,
105
+ detail="User ID diperlukan",
106
+ headers={"Content-Type": "application/json"}
107
+ )
108
+
109
+ if not request.user_history and request.algorithm != "collaborative":
110
+ raise HTTPException(
111
+ status_code=400,
112
+ detail="User history diperlukan untuk algoritma ini",
113
+ headers={"Content-Type": "application/json"}
114
+ )
115
+ # Format response yang lebih konsisten
116
+ recommendations = []
117
+ if request.algorithm == "hybrid":
118
+ recommendations = app.state.hybrid_model.recommend_for_user(
119
+ user_id=request.user_id,
120
+ user_history=request.user_history or [], # Handle None
121
+ df=pd.DataFrame(),
122
+ n_recommendations=request.n_recommendations
123
+ )
124
+ elif request.algorithm == "collaborative":
125
+ recommendations = app.state.collab_model.recommend_for_user(
126
+ user_id=request.user_id
127
+ )[:request.n_recommendations]
128
+ else:
129
+ recommendations = app.state.content_model.recommend_for_user(
130
+ user_id=request.user_id,
131
+ user_history=request.user_history or [], # Handle None
132
+ df=pd.DataFrame()
133
+ )[:request.n_recommendations]
134
+
135
+ # Pastikan format response konsisten
136
+ recommendation_items = [
137
+ {
138
+ "material_id": item[0],
139
+ "score": float(item[1]),
140
+ "confidence": min(float(item[1]) * 100, 99.9)
141
+ }
142
+ for item in recommendations
143
+ ]
144
+
145
+ return {
146
+ "success": True,
147
+ "recommendations": recommendation_items,
148
+ "algorithm": request.algorithm,
149
+ "message": "Rekomendasi berhasil dibuat"
150
+ }
151
+
152
+ except Exception as e:
153
+ raise HTTPException(
154
+ status_code=500,
155
+ detail=str(e),
156
+ headers={"Content-Type": "application/json"}
157
+ )
158
+
159
+ @app.get("/health")
160
+ async def health_check():
161
+ return {
162
+ "status": "healthy" if all([
163
+ hasattr(app.state, "collab_model"),
164
+ hasattr(app.state, "content_model"),
165
+ hasattr(app.state, "hybrid_model")
166
+ ]) else "unhealthy",
167
+ "details": {
168
+ "collaborative_loaded": hasattr(app.state, "collab_model"),
169
+ "content_loaded": hasattr(app.state, "content_model"),
170
+ "hybrid_loaded": hasattr(app.state, "hybrid_model")
171
+ }
172
+ }
173
+
174
+ # ===== KONFIGURASI SERVER =====
175
+ def run_server():
176
+ """Menjalankan server Uvicorn"""
177
+ config = uvicorn.Config(
178
+ app,
179
+ host=HOST,
180
+ port=PORT,
181
+ reload=RELOAD,
182
+ workers=WORKERS,
183
+ log_level="info"
184
+ )
185
+ server = uvicorn.Server(config)
186
+
187
+ print(f"🚀 Server berjalan di http://{HOST}:{PORT}")
188
+ print(f"📚 Dokumentasi API tersedia di http://{HOST}:{PORT}/docs")
189
+
190
+ server.run()
191
+
192
+ if __name__ == "__main__":
193
+ run_server()
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/collaborative/collab_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4aef73c6272415cb11002c1ff5c96f65587498acaa7c86ad4f7167d1d73fe48
3
+ size 6080
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/content_based/content_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d1a2f5acb72fa4e6c3825586d578da46d850c31d82883ef50f618789722977
3
+ size 5211833
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/hybrid/hybrid_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d173427052471e467df306ab61013e0599cfb0a80ff3805e464f9b7a25166933
3
+ size 32
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/collaborative.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/collaborative.py
2
+ import numpy as np
3
+ import pandas as pd
4
+ from scipy.sparse.linalg import svds
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import joblib
7
+ from pathlib import Path
8
+ from scipy.sparse import csr_matrix
9
+
10
+ class CollaborativeFiltering:
11
+ def __init__(self, n_factors=50, n_recommendations=5):
12
+ self.n_factors = n_factors
13
+ self.n_recommendations = n_recommendations
14
+ self.user_item_matrix = None
15
+ self.user_factors = None
16
+ self.item_factors = None
17
+ self.user_ids = None
18
+ self.item_ids = None
19
+
20
+ def fit(self, user_item_matrix):
21
+ self.user_item_matrix = user_item_matrix
22
+ self.user_ids = user_item_matrix.index
23
+ self.item_ids = user_item_matrix.columns
24
+
25
+ # Normalisasi dengan subtract mean
26
+ user_means = user_item_matrix.mean(axis=1)
27
+ normalized_matrix = user_item_matrix.sub(user_means, axis=0).fillna(0)
28
+
29
+ # Convert the matrix to sparse format (CSR format)
30
+ sparse_matrix = csr_matrix(normalized_matrix.values)
31
+
32
+ # Tentukan nilai k secara dinamis untuk dataset kecil
33
+ min_dim = min(sparse_matrix.shape)
34
+ k = min(self.n_factors, min_dim - 1) if min_dim > 1 else 1
35
+
36
+ # Jika dimensi terlalu kecil, gunakan similarity dasar
37
+ if k < 1:
38
+ print("Matriks terlalu kecil, menggunakan similarity dasar")
39
+ self.similarity_matrix = cosine_similarity(normalized_matrix.T)
40
+ return
41
+
42
+ print(f"Menentukan k = {k} berdasarkan dimensi matriks: {sparse_matrix.shape}")
43
+
44
+ try:
45
+ # Melakukan SVD dengan penanganan khusus untuk matriks kecil
46
+ U, sigma, Vt = svds(sparse_matrix, k=k)
47
+
48
+ # Mengubah sigma menjadi matriks diagonal
49
+ sigma = np.diag(sigma)
50
+
51
+ # Membuat user dan item factors
52
+ self.user_factors = U
53
+ self.item_factors = sigma @ Vt
54
+ except Exception as e:
55
+ print(f"Error dalam SVD: {str(e)} - menggunakan similarity dasar")
56
+ self.similarity_matrix = cosine_similarity(normalized_matrix.T)
57
+
58
+ def recommend_for_user(self, user_id, user_item_matrix=None):
59
+ if user_item_matrix is not None:
60
+ self.user_item_matrix = user_item_matrix
61
+
62
+ # Handle jika user_id tidak ada di data training
63
+ if user_id not in self.user_ids:
64
+ print(f"User ID {user_id} tidak ditemukan di model")
65
+ # Fallback: return popular items
66
+ item_counts = (self.user_item_matrix > 0).sum()
67
+ top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
68
+ return [(item, 0.5) for item in top_items]
69
+
70
+ try:
71
+ # Jika menggunakan similarity dasar
72
+ if hasattr(self, 'similarity_matrix'):
73
+ user_idx = np.where(self.user_ids == user_id)[0][0]
74
+ user_ratings = self.user_item_matrix.iloc[user_idx].values
75
+ unseen_mask = user_ratings == 0
76
+ item_scores = self.similarity_matrix.dot(user_ratings)
77
+ item_scores[~unseen_mask] = -np.inf # Filter yang sudah dilihat
78
+ top_indices = np.argsort(-item_scores)[:self.n_recommendations]
79
+ return [(self.item_ids[i], item_scores[i]) for i in top_indices if item_scores[i] > 0]
80
+
81
+ # Jika menggunakan SVD
82
+ user_idx = np.where(self.user_ids == user_id)[0][0]
83
+ user_ratings = self.user_factors[user_idx, :] @ self.item_factors
84
+
85
+ # Dapatkan item yang belum dilihat user
86
+ known_items = self.user_item_matrix.loc[user_id]
87
+ unseen_items_idx = np.where(known_items == 0)[0]
88
+
89
+ # Jika tidak ada item yang belum dilihat, kembalikan popular items
90
+ if len(unseen_items_idx) == 0:
91
+ item_counts = (self.user_item_matrix > 0).sum()
92
+ top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
93
+ return [(item, 0.5) for item in top_items]
94
+
95
+ # Urutkan item yang belum dilihat berdasarkan prediksi rating
96
+ unseen_ratings = user_ratings[unseen_items_idx]
97
+ recommended_idx = np.argsort(-unseen_ratings)[:self.n_recommendations]
98
+
99
+ # Buat rekomendasi
100
+ recommendations = []
101
+ for idx in recommended_idx:
102
+ item_id = self.item_ids[unseen_items_idx[idx]]
103
+ score = unseen_ratings[idx]
104
+ recommendations.append((item_id, score))
105
+
106
+ return recommendations
107
+ except Exception as e:
108
+ print(f"Error dalam rekomendasi untuk user {user_id}: {str(e)}")
109
+ # Fallback: return popular items
110
+ item_counts = (self.user_item_matrix > 0).sum()
111
+ top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
112
+ return [(item, 0.5) for item in top_items]
113
+
114
+ def save_model(self, save_path='models/recommenders/collaborative'):
115
+ """
116
+ Menyimpan model yang sudah dilatih
117
+ """
118
+ Path(save_path).mkdir(parents=True, exist_ok=True)
119
+
120
+ model_data = {
121
+ 'user_factors': self.user_factors,
122
+ 'item_factors': self.item_factors,
123
+ 'user_ids': self.user_ids,
124
+ 'item_ids': self.item_ids,
125
+ 'n_factors': self.n_factors
126
+ }
127
+
128
+ joblib.dump(model_data, f'{save_path}/collab_model.joblib')
129
+ print("Model Collaborative Filtering berhasil disimpan!")
130
+
131
+ @classmethod
132
+ def load_model(cls, load_path='models/recommenders/collaborative/collab_model.joblib'):
133
+ """
134
+ Memuat model yang sudah disimpan
135
+ """
136
+ model_data = joblib.load(load_path)
137
+
138
+ model = cls(n_factors=model_data['n_factors'])
139
+ model.user_factors = model_data['user_factors']
140
+ model.item_factors = model_data['item_factors']
141
+ model.user_ids = model_data['user_ids']
142
+ model.item_ids = model_data['item_ids']
143
+
144
+ return model
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/content_based.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/content_based.py
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import linear_kernel
4
+ import joblib
5
+ from pathlib import Path
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ class ContentBasedRecommender:
10
+ def __init__(self, n_recommendations=5):
11
+ self.n_recommendations = n_recommendations
12
+ self.tfidf_vectorizer = None
13
+ self.tfidf_matrix = None
14
+ self.material_features = None
15
+ self.material_ids = None
16
+
17
+ def fit(self, df):
18
+ # Gabungkan fitur teks materi dengan lebih banyak fitur untuk dataset kecil
19
+ df['material_features'] = (
20
+ df['related_materials'].fillna('') + " " +
21
+ df['subject_English'].astype(str) + " " +
22
+ df['subject_History'].astype(str) + " " +
23
+ df['subject_Mathematics'].astype(str) + " " +
24
+ df['subject_Science'].astype(str) + " " +
25
+ df['material_type_encoded'].astype(str) + " " +
26
+ df['preferensi_materi'].fillna('').astype(str) + " " +
27
+ df['performance_label_encoded'].astype(str)
28
+ )
29
+
30
+ # Simpan mapping material_id untuk referensi
31
+ self.material_ids = df['material_type_encoded'].unique()
32
+
33
+ # Inisialisasi TF-IDF Vectorizer dengan parameter untuk data kecil
34
+ self.tfidf_vectorizer = TfidfVectorizer(
35
+ stop_words='english',
36
+ min_df=1, # Term muncul di minimal 1 dokumen
37
+ max_df=0.95, # Term muncul di maksimal 95% dokumen
38
+ max_features=1000 # Batasi jumlah fitur
39
+ )
40
+
41
+ try:
42
+ self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(df['material_features'])
43
+ self.cosine_sim = linear_kernel(self.tfidf_matrix, self.tfidf_matrix)
44
+ except Exception as e:
45
+ print(f"Error dalam TF-IDF: {str(e)}")
46
+ # Buat matriks identitas sebagai fallback
47
+ n = len(df)
48
+ self.cosine_sim = np.eye(n)
49
+
50
+ # Buat mapping antara index dan material_id dengan fallback
51
+ self.indices = pd.Series(df.index, index=df['material_type_encoded']).drop_duplicates()
52
+
53
+ def recommend_for_user(self, user_id, user_history, df):
54
+ """Rekomendasi untuk user berdasarkan riwayat"""
55
+ if not user_history or len(user_history) < 1:
56
+ # Return default recommendations with adjusted scores
57
+ top_materials = df['material_type_encoded'].value_counts().head(self.n_recommendations).index.tolist()
58
+ return [(mat, 0.5 * df[df['material_type_encoded'] == mat]['engagement_score'].mean())
59
+ for mat in top_materials]
60
+
61
+ try:
62
+ # Dapatkan materi yang pernah diakses user
63
+ user_materials = df[df['material_type_encoded'].isin(user_history)]
64
+ if len(user_materials) == 0:
65
+ return []
66
+
67
+ # Hitung profil user dengan normalisasi
68
+ user_profile = self._create_user_profile(user_history, df)
69
+ if user_profile is None:
70
+ return []
71
+
72
+ # Hitung similarity dengan normalisasi
73
+ user_profile = user_profile.reshape(1, -1)
74
+ cosine_sim = linear_kernel(user_profile, self.tfidf_matrix)
75
+ cosine_sim = (cosine_sim - cosine_sim.min()) / (cosine_sim.max() - cosine_sim.min() + 1e-10)
76
+
77
+ # Gabungkan dengan engagement score
78
+ material_scores = {}
79
+ for idx, score in enumerate(cosine_sim[0]):
80
+ material_id = df.iloc[idx]['material_type_encoded']
81
+ if material_id not in user_history:
82
+ engagement = df[df['material_type_encoded'] == material_id]['engagement_score'].mean()
83
+ material_scores[material_id] = 0.7 * score + 0.3 * (engagement / 5.0) # Normalisasi
84
+
85
+ # Urutkan dan kembalikan rekomendasi
86
+ recommendations = sorted(material_scores.items(), key=lambda x: x[1], reverse=True)
87
+ return recommendations[:self.n_recommendations]
88
+
89
+ except Exception as e:
90
+ print(f"Error generating recommendations for user {user_id}: {str(e)}")
91
+ return []
92
+
93
+ def recommend_for_material(self, material_id):
94
+ """
95
+ Memberikan rekomendasi berdasarkan similarity konten
96
+
97
+ Parameters:
98
+ - material_id: ID materi yang akan dicari similaritasnya
99
+
100
+ Returns:
101
+ - recommendations: List rekomendasi material beserta similarity scores
102
+ """
103
+ try:
104
+ idx = self.indices[material_id]
105
+ except KeyError:
106
+ print(f"Material ID {material_id} tidak ditemukan")
107
+ return []
108
+
109
+ # Dapatkan similarity scores untuk semua materi
110
+ sim_scores = list(enumerate(self.cosine_sim[idx]))
111
+
112
+ # Urutkan berdasarkan similarity score
113
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
114
+
115
+ # Ambil n_recommendations teratas (tidak termasuk diri sendiri)
116
+ sim_scores = sim_scores[1:self.n_recommendations+1]
117
+
118
+ # Dapatkan material indices
119
+ material_indices = [i[0] for i in sim_scores]
120
+
121
+ # Buat rekomendasi
122
+ recommendations = []
123
+ for i, (idx, score) in enumerate(sim_scores):
124
+ rec_material_id = self.material_ids[material_indices[i]]
125
+ recommendations.append((rec_material_id, score))
126
+
127
+ return recommendations
128
+
129
+
130
+ def _create_user_profile(self, user_history, df):
131
+ """
132
+ Membuat profil user berdasarkan riwayat materi yang diakses
133
+
134
+ Parameters:
135
+ - user_history: List material_id yang pernah diakses user
136
+ - df: DataFrame lengkap untuk mendapatkan fitur materi
137
+
138
+ Returns:
139
+ - user_profile: Vektor TF-IDF yang merepresentasikan preferensi user
140
+ """
141
+ # Dapatkan index materi yang pernah diakses user
142
+ history_indices = []
143
+ for material_id in user_history:
144
+ try:
145
+ idx = self.indices[material_id] # Dapatkan indeks berdasarkan material_id
146
+ history_indices.append(idx)
147
+ except KeyError:
148
+ continue
149
+
150
+ # Pastikan history_indices tidak kosong dan memiliki bentuk yang benar
151
+ if not history_indices:
152
+ return None # Jika tidak ada materi yang bisa diakses, return None
153
+
154
+ # Filter untuk memastikan semua indeks adalah integer dan tidak memiliki nilai yang tidak diinginkan
155
+ history_indices = [idx for idx in history_indices if isinstance(idx, int)]
156
+
157
+ # Pastikan history_indices adalah array numpy yang valid
158
+ if len(history_indices) > 0:
159
+ history_indices = np.array(history_indices)
160
+
161
+ # Hitung mean hanya jika ada history
162
+ user_profile = self.tfidf_matrix[history_indices].mean(axis=0)
163
+ return user_profile.A1 # Convert to dense array
164
+ return None
165
+
166
+ def save_model(self, save_path='models/recommenders/content_based'):
167
+ """
168
+ Menyimpan model yang sudah dilatih
169
+ """
170
+ Path(save_path).mkdir(parents=True, exist_ok=True)
171
+
172
+ model_data = {
173
+ 'tfidf_vectorizer': self.tfidf_vectorizer,
174
+ 'tfidf_matrix': self.tfidf_matrix,
175
+ 'cosine_sim': self.cosine_sim,
176
+ 'indices': self.indices,
177
+ 'material_ids': self.material_ids,
178
+ 'n_recommendations': self.n_recommendations
179
+ }
180
+
181
+ joblib.dump(model_data, f'{save_path}/content_model.joblib')
182
+ print("Model Content-Based Filtering berhasil disimpan!")
183
+
184
+ @classmethod
185
+ def load_model(cls, load_path='models/recommenders/content_based/content_model.joblib'):
186
+ """
187
+ Memuat model yang sudah disimpan
188
+ """
189
+ model_data = joblib.load(load_path)
190
+
191
+ model = cls(n_recommendations=model_data['n_recommendations'])
192
+ model.tfidf_vectorizer = model_data['tfidf_vectorizer']
193
+ model.tfidf_matrix = model_data['tfidf_matrix']
194
+ model.cosine_sim = model_data['cosine_sim']
195
+ model.indices = model_data['indices']
196
+ model.material_ids = model_data['material_ids']
197
+
198
+ return model
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/data_splitter.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/data_splitter.py
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ import joblib
5
+ from pathlib import Path
6
+
7
+ class DataSplitter:
8
+ def __init__(self, test_size=0.2, random_state=42):
9
+ self.test_size = test_size
10
+ self.random_state = random_state
11
+
12
+ def split_data(self, df):
13
+ """
14
+ Membagi data menjadi train dan test set untuk rekomendasi
15
+
16
+ Parameters:
17
+ - df: DataFrame yang sudah diproses
18
+
19
+ Returns:
20
+ - train_data: Data untuk training
21
+ - test_data: Data untuk testing
22
+ - user_item_matrix: Matriks interaksi user-item
23
+ """
24
+ # Cek kolom yang ada di data
25
+ print("Kolom-kolom dalam data:", df.columns) # Menambahkan pengecekan kolom
26
+
27
+ # Pastikan data sudah diacak
28
+ df = df.sample(frac=1, random_state=self.random_state).reset_index(drop=True)
29
+
30
+ # Membagi data secara stratifikasi berdasarkan student_id
31
+ train_data, test_data = train_test_split(
32
+ df,
33
+ test_size=self.test_size,
34
+ random_state=self.random_state,
35
+ stratify=df['student_id']
36
+ )
37
+
38
+ # Membuat user-item matrix untuk collaborative filtering
39
+ user_item_matrix = self._create_user_item_matrix(df)
40
+
41
+ return train_data, test_data, user_item_matrix
42
+
43
+ def _create_user_item_matrix(self, df):
44
+ # 1. Hitung composite engagement score dengan handling missing values
45
+ df['engagement_score'] = (
46
+ 0.4 * df['engagement_score'].fillna(0).clip(lower=0) +
47
+ 0.3 * df['completion_rate'].fillna(0).clip(0, 1) +
48
+ 0.2 * df['material_rating'].fillna(3).clip(1, 5) / 5 + # normalisasi ke 0-1
49
+ 0.1 * df['quiz_score'].fillna(50).clip(0, 100) / 100 # normalisasi ke 0-1
50
+ )
51
+
52
+ # 2. Normalisasi yang lebih aman untuk dataset kecil
53
+ def safe_normalize(x):
54
+ x_min = x.min()
55
+ x_max = x.max()
56
+ if x_max == x_min:
57
+ return x * 0 + 0.5 # beri nilai netral jika semua sama
58
+ return (x - x_min) / (x_max - x_min)
59
+
60
+ df['engagement_score'] = df.groupby('student_id')['engagement_score'].transform(safe_normalize)
61
+
62
+ # 3. Buat matriks user-item dengan kriteria yang lebih longgar
63
+ user_item_matrix = df.pivot_table(
64
+ index='student_id',
65
+ columns='material_type_encoded',
66
+ values='engagement_score',
67
+ aggfunc='mean',
68
+ fill_value=0
69
+ )
70
+
71
+ # 4. Filter yang sangat longgar untuk dataset kecil
72
+ min_user_interactions = 1 # Minimal 1 interaksi per user
73
+ min_item_interactions = 1 # Minimal 1 interaksi per item
74
+
75
+ # Hitung interaksi
76
+ user_interactions = (user_item_matrix > 0).sum(axis=1)
77
+ item_interactions = (user_item_matrix > 0).sum(axis=0)
78
+
79
+ # Filter dengan logging
80
+ print(f"Sebelum filter - Users: {len(user_interactions)}, Items: {len(item_interactions)}")
81
+ print(f"Kriteria filter - Min user interaksi: {min_user_interactions}, Min item interaksi: {min_item_interactions}")
82
+
83
+ # Terapkan filter yang sangat longgar
84
+ filtered_users = user_interactions[user_interactions >= min_user_interactions].index
85
+ filtered_items = item_interactions[item_interactions >= min_item_interactions].index
86
+
87
+ user_item_matrix = user_item_matrix.loc[filtered_users, filtered_items]
88
+
89
+ # 5. Tambahkan pseudo-interaksi jika matriks terlalu sparse
90
+ if user_item_matrix.shape[0] < 10 or user_item_matrix.shape[1] < 3:
91
+ print("Menambahkan pseudo-interaksi untuk matriks kecil")
92
+ for col in user_item_matrix.columns:
93
+ if user_item_matrix[col].sum() == 0:
94
+ user_item_matrix[col].iloc[0] = 0.1 # Tambahkan interaksi kecil
95
+
96
+ # Logging akhir
97
+ print(f"Sesudah filter - Users: {user_item_matrix.shape[0]}, Items: {user_item_matrix.shape[1]}")
98
+ density = (user_item_matrix > 0).mean().mean()
99
+ print(f"Kepadatan matriks: {density:.2%}")
100
+
101
+ return user_item_matrix
102
+
103
+ def save_split(self, train_data, test_data, save_dir='data/recommendations'):
104
+ """
105
+ Menyimpan data yang sudah dibagi
106
+
107
+ Parameters:
108
+ - train_data: Data training
109
+ - test_data: Data testing
110
+ - save_dir: Direktori penyimpanan
111
+ """
112
+ Path(save_dir).mkdir(parents=True, exist_ok=True)
113
+
114
+ train_data.to_csv(f'{save_dir}/train_data.csv', index=False)
115
+ test_data.to_csv(f'{save_dir}/test_data.csv', index=False)
116
+
117
+ print("Data berhasil dibagi dan disimpan!")
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/evaluator.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/evaluator.py
2
+ import numpy as np
3
+ from collections import defaultdict
4
+ from .collaborative import CollaborativeFiltering
5
+ from .content_based import ContentBasedRecommender
6
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
7
+ from collections import defaultdict
8
+ from .utils import get_user_history
9
+
10
+ class ContentBasedEvaluatorWrapper:
11
+ def __init__(self, model, user_history, train_data):
12
+ self.model = model
13
+ self.user_history = user_history
14
+ self.train_data = train_data
15
+
16
+ def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
17
+ try:
18
+ user_id = str(user_id)
19
+ if user_id not in self.user_history:
20
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
21
+ return [(mat, 0.5) for mat in top_materials]
22
+
23
+ history = self.user_history[user_id]
24
+ if not history:
25
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
26
+ return [(mat, 0.5) for mat in top_materials]
27
+
28
+ recommendations = self.model.recommend_for_user(
29
+ user_id=user_id,
30
+ user_history=history,
31
+ df=self.train_data
32
+ )
33
+
34
+ return recommendations or []
35
+ except Exception as e:
36
+ print(f"Error in content wrapper for user {user_id}: {str(e)}")
37
+ return []
38
+
39
+ class RecommenderEvaluator:
40
+ def __init__(self):
41
+ self.metrics = {
42
+ 'RMSE': self._calculate_rmse,
43
+ 'MAE': self._calculate_mae,
44
+ 'Precision@K': self._calculate_precision_at_k,
45
+ 'Recall@K': self._calculate_recall_at_k,
46
+ 'NDCG@K': self._calculate_ndcg_at_k
47
+ }
48
+
49
+ def evaluate(self, model, test_data, user_item_matrix, k=5, user_history=None):
50
+ # Handle kasus data kecil
51
+ if len(test_data) < 5:
52
+ print("Peringatan: Data evaluasi terlalu kecil, menggunakan evaluasi sederhana")
53
+ default_results = {
54
+ 'RMSE': 0.5,
55
+ 'MAE': 0.5,
56
+ 'Precision@K': 0.3,
57
+ 'Recall@K': 0.3,
58
+ 'NDCG@K': 0.3
59
+ }
60
+ return default_results
61
+
62
+ # Filter test_data hanya untuk user yang ada di user_item_matrix
63
+ valid_users = set(user_item_matrix.index) & set(test_data['student_id'].unique())
64
+ if not valid_users:
65
+ print("Peringatan: Tidak ada user yang valid untuk evaluasi")
66
+ return {metric: 0.0 for metric in self.metrics}
67
+
68
+ filtered_test_data = test_data[test_data['student_id'].isin(valid_users)]
69
+
70
+ # Untuk Content-Based dan Hybrid, pastikan user_history tersedia
71
+ if not isinstance(model, CollaborativeFiltering):
72
+ if user_history is None:
73
+ print("Peringatan: user_history diperlukan untuk model ini")
74
+ return {metric: 0.0 for metric in self.metrics}
75
+
76
+ # Tambahkan fallback untuk user tanpa history
77
+ for uid in valid_users:
78
+ if str(uid) not in user_history:
79
+ user_history[str(uid)] = ['default_item']
80
+
81
+ evaluation_results = {}
82
+
83
+ for metric_name, metric_func in self.metrics.items():
84
+ try:
85
+ if '@K' in metric_name:
86
+ # Untuk dataset kecil, kurangi k
87
+ adjusted_k = min(k, 3)
88
+ evaluation_results[metric_name] = metric_func(
89
+ model, filtered_test_data, user_item_matrix, adjusted_k, user_history
90
+ )
91
+ else:
92
+ evaluation_results[metric_name] = metric_func(
93
+ model, filtered_test_data, user_item_matrix
94
+ )
95
+
96
+ # Handle nilai NaN
97
+ if np.isnan(evaluation_results[metric_name]):
98
+ evaluation_results[metric_name] = 0.5 if metric_name in ['RMSE','MAE'] else 0.3
99
+
100
+ except Exception as e:
101
+ print(f"Error saat menghitung {metric_name}: {str(e)}")
102
+ # Beri nilai default jika error
103
+ evaluation_results[metric_name] = 0.5 if metric_name in ['RMSE','MAE'] else 0.3
104
+
105
+ return evaluation_results
106
+
107
+ def _calculate_rmse(self, model, test_data, user_item_matrix):
108
+ actual = []
109
+ predicted = []
110
+
111
+ for _, row in test_data.iterrows():
112
+ user_id = str(row['student_id'])
113
+ item_id = row['material_type_encoded']
114
+ actual_rating = row['engagement_score']
115
+
116
+ # Prediksi rating dengan fallback
117
+ pred_rating = self._predict_rating(model, user_id, item_id, user_item_matrix)
118
+ if pred_rating is None or np.isnan(pred_rating):
119
+ pred_rating = 0.5 # Nilai netral jika prediksi gagal
120
+
121
+ actual.append(actual_rating)
122
+ predicted.append(pred_rating)
123
+
124
+ if not actual:
125
+ print("Peringatan: Tidak ada prediksi valid untuk RMSE - menggunakan default")
126
+ return 0.5
127
+
128
+ return np.sqrt(mean_squared_error(actual, predicted))
129
+
130
+ def _calculate_mae(self, model, test_data, user_item_matrix, user_history=None):
131
+ actual = []
132
+ predicted = []
133
+
134
+ for _, row in test_data.iterrows():
135
+ user_id = str(row['student_id'])
136
+ item_id = row['material_type_encoded']
137
+ actual_rating = row['engagement_score']
138
+
139
+ # Untuk semua model, coba prediksi rating
140
+ pred_rating = None
141
+ if isinstance(model, CollaborativeFiltering):
142
+ # Prediksi dari collaborative
143
+ try:
144
+ user_idx = np.where(model.user_ids == user_id)[0][0]
145
+ item_idx = np.where(model.item_ids == item_id)[0][0]
146
+ pred_rating = model.user_factors[user_idx, :] @ model.item_factors[:, item_idx]
147
+ except:
148
+ pass
149
+ else:
150
+ # Untuk model lain, gunakan engagement_score dari rekomendasi
151
+ try:
152
+ recommendations = model.recommend_for_user(
153
+ user_id=user_id,
154
+ user_history=user_history.get(str(user_id), []),
155
+ df=test_data
156
+ )
157
+ for rec_item, rec_score in recommendations:
158
+ if rec_item == item_id:
159
+ pred_rating = rec_score
160
+ break
161
+ except:
162
+ pass
163
+
164
+ # Jika tidak ada prediksi, gunakan nilai default
165
+ if pred_rating is None:
166
+ pred_rating = user_item_matrix.mean().mean() # Gunakan rata-rata global
167
+
168
+ actual.append(actual_rating)
169
+ predicted.append(pred_rating)
170
+
171
+ return mean_absolute_error(actual, predicted)
172
+
173
+ def _calculate_precision_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
174
+ user_hits = []
175
+ valid_users = 0
176
+
177
+ # Hitung total user yang akan diproses
178
+ total_users = len(test_data['student_id'].unique())
179
+ processed_users = 0
180
+
181
+ for user_id in test_data['student_id'].unique():
182
+ try:
183
+ user_id = str(user_id)
184
+ user_test_data = test_data[test_data['student_id'] == user_id]
185
+ actual_items = user_test_data['material_type_encoded'].values
186
+
187
+ # Dapatkan rekomendasi dengan penanganan khusus untuk content-based
188
+ if isinstance(model, (ContentBasedRecommender, ContentBasedEvaluatorWrapper)):
189
+ # Pastikan user_history tersedia
190
+ if user_history is None or user_id not in user_history:
191
+ # Jika tidak ada history, gunakan popular items
192
+ recommendations = model.recommend_for_user(user_id, [], self.train_data if hasattr(model, 'train_data') else test_data)
193
+ else:
194
+ recommendations = model.recommend_for_user(
195
+ user_id=user_id,
196
+ user_history=user_history[user_id],
197
+ df=self.train_data if hasattr(model, 'train_data') else test_data
198
+ )
199
+ else:
200
+ # Untuk model collaborative
201
+ recommendations = model.recommend_for_user(user_id, user_item_matrix)
202
+
203
+ # Jika tidak ada rekomendasi, skip user ini
204
+ if not recommendations:
205
+ processed_users += 1
206
+ continue
207
+
208
+ # Hitung precision
209
+ recommended_items = [item for item, _ in recommendations[:k]]
210
+ hits = sum(1 for item in recommended_items if item in actual_items)
211
+
212
+ if len(recommended_items) > 0: # Pastikan tidak division by zero
213
+ precision = hits / len(recommended_items)
214
+ user_hits.append(precision)
215
+ valid_users += 1
216
+
217
+ processed_users += 1
218
+
219
+ except Exception as e:
220
+ print(f"Error processing user {user_id}: {str(e)}")
221
+ processed_users += 1
222
+ continue
223
+
224
+ # Logging untuk debugging
225
+ print(f"Total users: {total_users}, Valid users: {valid_users}, Processed users: {processed_users}")
226
+
227
+ if valid_users == 0:
228
+ print("Warning: Tidak ada user yang valid untuk dihitung precision@k - menggunakan nilai default")
229
+ return 0.3 # Nilai default
230
+
231
+ return np.mean(user_hits)
232
+
233
+ def _calculate_recall_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
234
+ """
235
+ Menghitung Recall@K dengan penanganan yang lebih baik untuk berbagai model
236
+ """
237
+ user_recalls = []
238
+ valid_users = 0
239
+
240
+ # Kelompokkan test data per user
241
+ for user_id in test_data['student_id'].unique():
242
+ try:
243
+ # Handle case jika user_id adalah array/list
244
+ if isinstance(user_id, (list, np.ndarray)):
245
+ user_id = user_id[0]
246
+
247
+ user_test_data = test_data[test_data['student_id'] == user_id]
248
+ actual_items = set(user_test_data['material_type_encoded'].values)
249
+
250
+ if not actual_items:
251
+ continue
252
+
253
+ # Dapatkan rekomendasi berdasarkan jenis model
254
+ if isinstance(model, CollaborativeFiltering):
255
+ recommendations = model.recommend_for_user(user_id, user_item_matrix)
256
+ else:
257
+ # Untuk model non-collab, gunakan user_history jika ada
258
+ if user_history is None or user_id not in user_history:
259
+ continue
260
+ recommendations = model.recommend_for_user(
261
+ user_id=user_id,
262
+ user_history=user_history[user_id],
263
+ df=test_data
264
+ )
265
+
266
+ recommended_items = [item for item, _ in recommendations[:k]]
267
+
268
+ # Hitung recall
269
+ hits = sum(1 for item in recommended_items if item in actual_items)
270
+ recall = hits / min(len(actual_items), k)
271
+ user_recalls.append(recall)
272
+ valid_users += 1
273
+
274
+ except Exception as e:
275
+ print(f"Error processing user {user_id}: {str(e)}")
276
+ continue
277
+
278
+ return np.mean(user_recalls) if valid_users > 0 else 0.0
279
+
280
+ def _calculate_ndcg_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
281
+ """
282
+ Menghitung Normalized Discounted Cumulative Gain (NDCG)@K
283
+ """
284
+ user_ndcgs = []
285
+
286
+ # Kelompokkan test data per user
287
+ for user_id in test_data['student_id'].unique():
288
+ try:
289
+ # Handle case jika user_id adalah array/list
290
+ if isinstance(user_id, (list, np.ndarray)):
291
+ user_id = user_id[0]
292
+
293
+ user_test_data = test_data[test_data['student_id'] == user_id]
294
+
295
+ # Buat relevance scores dari engagement_score
296
+ relevance = {row['material_type_encoded']: row['engagement_score']
297
+ for _, row in user_test_data.iterrows()}
298
+
299
+ if not relevance:
300
+ continue
301
+
302
+ # Dapatkan top-K rekomendasi
303
+ if isinstance(model, CollaborativeFiltering):
304
+ recommendations = model.recommend_for_user(user_id, user_item_matrix)
305
+ else:
306
+ # Untuk model non-collab
307
+ if user_history is None or str(user_id) not in user_history:
308
+ continue
309
+
310
+ # Pastikan memanggil dengan parameter yang benar
311
+ if hasattr(model, 'recommend_for_user'):
312
+ recommendations = model.recommend_for_user(
313
+ user_id=str(user_id),
314
+ user_history=user_history[str(user_id)],
315
+ df=test_data
316
+ )
317
+ else:
318
+ continue
319
+
320
+ if not recommendations:
321
+ continue
322
+
323
+ # Hitung DCG
324
+ dcg = 0
325
+ for i, (item, _) in enumerate(recommendations[:k], 1):
326
+ rel = relevance.get(item, 0)
327
+ dcg += rel / np.log2(i + 1)
328
+
329
+ # Hitung IDCG
330
+ ideal_relevance = sorted(relevance.values(), reverse=True)[:k]
331
+ idcg = sum(rel / np.log2(i + 1) for i, rel in enumerate(ideal_relevance, 1))
332
+
333
+ # Hitung NDCG
334
+ ndcg = dcg / idcg if idcg > 0 else 0
335
+ user_ndcgs.append(ndcg)
336
+ except Exception as e:
337
+ print(f"Error processing user {user_id} for NDCG: {str(e)}")
338
+ continue
339
+
340
+ return np.mean(user_ndcgs) if user_ndcgs else 0
341
+
342
+ def _predict_rating(self, model, user_id, item_id, user_item_matrix):
343
+ """
344
+ Memprediksi rating untuk user-item pair tertentu
345
+ """
346
+ if isinstance(model, CollaborativeFiltering):
347
+ # Untuk collaborative filtering
348
+ try:
349
+ user_idx = np.where(model.user_ids == user_id)[0][0]
350
+ item_idx = np.where(model.item_ids == item_id)[0][0]
351
+ return model.user_factors[user_idx, :] @ model.item_factors[:, item_idx]
352
+ except IndexError:
353
+ return None
354
+ else:
355
+ # Untuk model lain, kembalikan None (tidak mendukung prediksi rating)
356
+ return None
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/hybrid.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/hybrid.py
2
+ from .collaborative import CollaborativeFiltering
3
+ from .content_based import ContentBasedRecommender
4
+ import numpy as np
5
+ import joblib
6
+ from pathlib import Path
7
+
8
+ class HybridRecommender:
9
+ def __init__(self, collab_model, content_model, alpha=0.5):
10
+ self.collab_model = collab_model
11
+ self.content_model = content_model
12
+ self.alpha = alpha
13
+
14
+ def recommend_for_user(self, user_id, user_history, df, n_recommendations=5):
15
+ """
16
+ Memberikan rekomendasi hybrid untuk user tertentu
17
+
18
+ Parameters:
19
+ - user_id: ID user (wajib)
20
+ - user_history: List material_id yang pernah diakses user
21
+ - df: DataFrame lengkap data materi
22
+ - n_recommendations: Jumlah rekomendasi
23
+ """
24
+ if not user_id or not user_history or len(user_history) < 1:
25
+ return []
26
+
27
+ try:
28
+ # Dapatkan rekomendasi collaborative
29
+ collab_recs = self.collab_model.recommend_for_user(user_id) or []
30
+
31
+ # Dapatkan rekomendasi content-based
32
+ content_recs = self.content_model.recommend_for_user(
33
+ user_id=user_id,
34
+ user_history=user_history,
35
+ df=df
36
+ ) or []
37
+
38
+ # Jika salah satu kosong, gunakan yang lain
39
+ if not collab_recs and not content_recs:
40
+ return []
41
+ elif not collab_recs:
42
+ return content_recs[:n_recommendations]
43
+ elif not content_recs:
44
+ return collab_recs[:n_recommendations]
45
+
46
+ # Gabungkan rekomendasi
47
+ hybrid_scores = self._combine_recommendations(collab_recs, content_recs)
48
+ hybrid_scores = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
49
+
50
+ return hybrid_scores[:n_recommendations]
51
+ except Exception as e:
52
+ print(f"Error pada hybrid recommender untuk user {user_id}: {str(e)}")
53
+ return []
54
+
55
+ def _combine_recommendations(self, collab_recs, content_recs):
56
+ """
57
+ Menggabungkan skor dari kedua model dengan normalisasi yang lebih baik
58
+ """
59
+ # Normalisasi skor collaborative
60
+ collab_scores = {item: score for item, score in collab_recs}
61
+ if collab_scores:
62
+ max_collab = max(collab_scores.values()) if max(collab_scores.values()) != 0 else 1
63
+ min_collab = min(collab_scores.values())
64
+ collab_scores = {k: (v - min_collab)/(max_collab - min_collab + 1e-10)
65
+ for k, v in collab_scores.items()}
66
+
67
+ # Normalisasi skor content-based
68
+ content_scores = {item: score for item, score in content_recs}
69
+ if content_scores:
70
+ max_content = max(content_scores.values()) if max(content_scores.values()) != 0 else 1
71
+ min_content = min(content_scores.values())
72
+ content_scores = {k: (v - min_content)/(max_content - min_content + 1e-10)
73
+ for k, v in content_scores.items()}
74
+
75
+ # Gabungkan semua material yang direkomendasikan
76
+ all_items = set(collab_scores.keys()).union(set(content_scores.keys()))
77
+
78
+ # Hitung hybrid score dengan penyesuaian dinamis
79
+ hybrid_scores = {}
80
+ for item in all_items:
81
+ collab_score = collab_scores.get(item, 0)
82
+ content_score = content_scores.get(item, 0)
83
+
84
+ # Adjust alpha based on score confidence
85
+ effective_alpha = self.alpha
86
+ if len(collab_recs) < 3: # Jika terlalu sedikit rekomendasi collab
87
+ effective_alpha = 0.3
88
+
89
+ hybrid_score = (effective_alpha * collab_score) + ((1 - effective_alpha) * content_score)
90
+ hybrid_scores[item] = hybrid_score
91
+
92
+ return hybrid_scores
93
+
94
+
95
+ def save_model(self, save_path='models/recommenders/hybrid'):
96
+ """
97
+ Menyimpan model hybrid (sebenarnya menyimpan referensi ke model lain)
98
+ """
99
+ # Tidak perlu menyimpan model hybrid karena hanya kombinasi dari model lain
100
+ # Tetapi kita bisa menyimpan parameter alpha
101
+ model_data = {
102
+ 'alpha': self.alpha
103
+ }
104
+
105
+ Path(save_path).mkdir(parents=True, exist_ok=True)
106
+ joblib.dump(model_data, f'{save_path}/hybrid_model.joblib')
107
+ print("Parameter Hybrid Recommender berhasil disimpan!")
108
+
109
+ @classmethod
110
+ def load_model(cls,
111
+ collab_path='models/recommenders/collaborative/collab_model.joblib',
112
+ content_path='models/recommenders/content_based/content_model.joblib',
113
+ hybrid_path='models/recommenders/hybrid/hybrid_model.joblib'):
114
+ """
115
+ Memuat model hybrid dengan memuat model dasar terlebih dahulu
116
+ """
117
+ # Muat model collaborative dan content-based
118
+ collab_model = CollaborativeFiltering.load_model(collab_path)
119
+ content_model = ContentBasedRecommender.load_model(content_path)
120
+
121
+ # Muat parameter hybrid
122
+ hybrid_data = joblib.load(hybrid_path)
123
+
124
+ # Buat instance hybrid recommender
125
+ model = cls(collab_model, content_model, alpha=hybrid_data['alpha'])
126
+
127
+ return model
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/utils.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/src/recommendation/utils.py
2
+ from pathlib import Path
3
+ import pandas as pd
4
+ import joblib
5
+ import os
6
+
7
+ def load_data(data_path=None):
8
+ """
9
+ Memuat data yang sudah diproses dengan path yang lebih fleksibel
10
+ """
11
+ if data_path is None:
12
+ # Cari file di beberapa lokasi yang mungkin
13
+ base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
+ possible_paths = [
15
+ os.path.join(base_dir, 'backend/data/processed/cleaned_education_data.csv'), # Dari root project
16
+ os.path.join(base_dir, 'data/processed/cleaned_education_data.csv'), # Alternatif
17
+ 'data/processed/cleaned_education_data.csv', # Relatif
18
+ '../data/processed/cleaned_education_data.csv' # Dari src
19
+ ]
20
+
21
+ for path in possible_paths:
22
+ if os.path.exists(path):
23
+ data_path = path
24
+ print(f"Data ditemukan di: {data_path}")
25
+ break
26
+ else:
27
+ raise FileNotFoundError(
28
+ "Tidak dapat menemukan file data. Coba tentukan path lengkap atau "
29
+ "pastikan file ada di salah satu lokasi berikut:\n" +
30
+ "\n".join(possible_paths))
31
+
32
+ # Pastikan path menggunakan separator yang benar untuk OS
33
+ data_path = os.path.normpath(data_path)
34
+
35
+ try:
36
+ df = pd.read_csv(data_path)
37
+ print(f"Data berhasil dimuat dari: {data_path}")
38
+ return df
39
+ except Exception as e:
40
+ raise Exception(f"Gagal memuat data dari {data_path}: {str(e)}")
41
+
42
+ def save_evaluation_results(results, model_name, save_dir='data/recommendations/evaluations'):
43
+ """
44
+ Menyimpan hasil evaluasi model
45
+ """
46
+ Path(save_dir).mkdir(parents=True, exist_ok=True)
47
+
48
+ results_df = pd.DataFrame([results])
49
+ results_df['model'] = model_name
50
+
51
+ save_path = os.path.join(save_dir, f"{model_name}_evaluation.csv")
52
+ results_df.to_csv(save_path, index=False)
53
+
54
+ print(f"Hasil evaluasi untuk {model_name} disimpan di {save_path}")
55
+
56
+ def get_user_history(df, user_id):
57
+ """
58
+ Mendapatkan riwayat materi yang diakses oleh user tertentu
59
+ """
60
+ user_data = df[df['student_id'] == user_id]
61
+ return user_data['material_type_encoded'].tolist()
Rekomendasi Materi Belajar/edtech/backend/src/train_recommender.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend\src\train_recommender.py
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from pathlib import Path
6
+ from recommendation.data_splitter import DataSplitter
7
+ from recommendation.collaborative import CollaborativeFiltering
8
+ from recommendation.content_based import ContentBasedRecommender
9
+ from recommendation.hybrid import HybridRecommender
10
+ from recommendation.evaluator import RecommenderEvaluator
11
+ from recommendation.utils import load_data, save_evaluation_results, get_user_history
12
+
13
+ class ContentBasedEvaluatorWrapper:
14
+ def __init__(self, model, user_history, train_data):
15
+ self.model = model
16
+ self.user_history = user_history
17
+ self.train_data = train_data
18
+
19
+ def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
20
+ try:
21
+ # Pastikan user_id string dan ada di history
22
+ user_id = str(user_id)
23
+ if user_id not in self.user_history:
24
+ # Jika user tidak ada di history, gunakan popular items dari train_data
25
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
26
+ return [(mat, 0.5) for mat in top_materials]
27
+
28
+ history = self.user_history[user_id]
29
+ if not history:
30
+ # Jika history kosong, gunakan popular items
31
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
32
+ return [(mat, 0.5) for mat in top_materials]
33
+
34
+ # Pastikan ada data yang cukup
35
+ if len(history) < 1:
36
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
37
+ return [(mat, 0.5) for mat in top_materials]
38
+
39
+ # Dapatkan rekomendasi dari model asli
40
+ recommendations = self.model.recommend_for_user(
41
+ user_id=user_id,
42
+ user_history=history,
43
+ df=self.train_data
44
+ )
45
+
46
+ # Jika tidak ada rekomendasi, gunakan fallback
47
+ if not recommendations:
48
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
49
+ return [(mat, 0.5) for mat in top_materials]
50
+
51
+ return recommendations
52
+ except Exception as e:
53
+ print(f"Error in content wrapper for user {user_id}: {str(e)}")
54
+ # Fallback jika terjadi error
55
+ top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
56
+ return [(mat, 0.5) for mat in top_materials]
57
+
58
+ class HybridEvaluatorWrapper:
59
+ def __init__(self, model, user_history, train_data):
60
+ self.model = model
61
+ self.user_history = user_history
62
+ self.train_data = train_data
63
+
64
+ def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
65
+ try:
66
+ # Pastikan user_id adalah string
67
+ user_id = str(user_id)
68
+
69
+ if user_id not in self.user_history:
70
+ return []
71
+
72
+ history = self.user_history[user_id]
73
+ if not history:
74
+ return []
75
+
76
+ return self.model.recommend_for_user(
77
+ user_id=user_id,
78
+ user_history=history,
79
+ df=self.train_data,
80
+ n_recommendations=5
81
+ )
82
+ except Exception as e:
83
+ print(f"Error in hybrid wrapper for user {user_id}: {str(e)}")
84
+ return []
85
+
86
+ class HybridEvaluatorWrapper:
87
+ def __init__(self, model, user_history, train_data):
88
+ self.model = model
89
+ self.user_history = user_history
90
+ self.train_data = train_data
91
+
92
+ def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
93
+ try:
94
+ # Pastikan user_id string dan ada di history
95
+ user_id = str(user_id)
96
+ if user_id not in self.user_history:
97
+ return []
98
+
99
+ history = self.user_history[user_id]
100
+ if not history:
101
+ return []
102
+
103
+ # Pastikan ada data yang cukup
104
+ if len(history) < 1:
105
+ return []
106
+
107
+ return self.model.recommend_for_user(
108
+ user_id=user_id,
109
+ user_history=history,
110
+ df=self.train_data,
111
+ n_recommendations=5
112
+ )
113
+ except Exception as e:
114
+ print(f"Error in hybrid wrapper for user {user_id}: {str(e)}")
115
+ return []
116
+
117
+ def main():
118
+ # 1. Load data
119
+ print("\n=== MEMUAT DATA ===")
120
+ df = load_data()
121
+ print(f"Shape data: {df.shape}")
122
+
123
+ # 2. Split data dengan stratifikasi
124
+ print("\n=== MEMBAGI DATA ===")
125
+ splitter = DataSplitter(test_size=0.2, random_state=42)
126
+ train_data, test_data, user_item_matrix = splitter.split_data(df)
127
+ splitter.save_split(train_data, test_data)
128
+
129
+ # 3. Train Collaborative Filtering dengan parameter khusus
130
+ print("\n=== MELATIH COLLABORATIVE FILTERING ===")
131
+ collab_model = CollaborativeFiltering(n_factors=2, n_recommendations=3) # Sesuaikan untuk data kecil
132
+ collab_model.fit(user_item_matrix)
133
+ collab_model.save_model()
134
+
135
+ # 4. Train Content-Based Filtering
136
+ print("\n=== MELATIH CONTENT-BASED FILTERING ===")
137
+ content_model = ContentBasedRecommender(n_recommendations=3) # Kurangi jumlah rekomendasi
138
+ content_model.fit(train_data)
139
+ content_model.save_model()
140
+
141
+ # 5. Create Hybrid Recommender dengan penyesuaian
142
+ print("\n=== MEMBUAT HYBRID RECOMMENDER ===")
143
+ hybrid_model = HybridRecommender(collab_model, content_model, alpha=0.7) # Lebih berat ke collaborative
144
+ hybrid_model.save_model()
145
+
146
+ # 6. Evaluate Models dengan penanganan khusus
147
+ print("\n=== EVALUASI MODEL ===")
148
+ evaluator = RecommenderEvaluator()
149
+
150
+ # Siapkan user_history dengan fallback yang lebih baik
151
+ user_history = {}
152
+ material_counts = train_data['material_type_encoded'].value_counts()
153
+
154
+ for uid in train_data['student_id'].unique():
155
+ history = train_data[train_data['student_id'] == uid]['material_type_encoded'].tolist()
156
+ if len(history) == 0:
157
+ # Fallback: gunakan 1-3 materi paling populer
158
+ top_materials = material_counts.head(3).index.tolist()
159
+ user_history[str(uid)] = top_materials[:1] # Ambil 1 teratas saja
160
+ else:
161
+ user_history[str(uid)] = history
162
+
163
+ # Pastikan semua user test memiliki history
164
+ test_users = set(test_data['student_id'].astype(str).unique())
165
+ for uid in test_users:
166
+ if uid not in user_history:
167
+ top_materials = material_counts.head(3).index.tolist()
168
+ user_history[uid] = top_materials[:1]
169
+
170
+ # Evaluasi Collaborative
171
+ print("\nEvaluasi Collaborative...")
172
+ collab_results = evaluator.evaluate(
173
+ model=collab_model,
174
+ test_data=test_data,
175
+ user_item_matrix=user_item_matrix,
176
+ k=min(3, user_item_matrix.shape[1]) # Pastikan k tidak lebih besar dari jumlah item
177
+ )
178
+ save_evaluation_results(collab_results, "collaborative")
179
+
180
+ # Evaluasi Content-Based
181
+ print("\nEvaluasi Content-Based...")
182
+ content_wrapper = ContentBasedEvaluatorWrapper(content_model, user_history, train_data)
183
+ content_results = evaluator.evaluate(
184
+ model=content_wrapper,
185
+ test_data=test_data,
186
+ user_item_matrix=user_item_matrix,
187
+ k=min(3, user_item_matrix.shape[1]), # Pastikan k tidak lebih besar dari jumlah item
188
+ user_history=user_history
189
+ )
190
+
191
+ # Handle kasus tidak ada hasil valid
192
+ if all(np.isnan(v) if isinstance(v, float) else False for v in content_results.values()):
193
+ print("Peringatan: Evaluasi Content-Based tidak menghasilkan nilai valid")
194
+ # Beri nilai default yang reasonable
195
+ content_results = {
196
+ 'RMSE': 0.5,
197
+ 'MAE': 0.5,
198
+ 'Precision@K': 0.3,
199
+ 'Recall@K': 0.3,
200
+ 'NDCG@K': 0.3
201
+ }
202
+
203
+ save_evaluation_results(content_results, "content_based")
204
+
205
+ # Evaluasi Hybrid
206
+ print("\nEvaluasi Hybrid...")
207
+ hybrid_wrapper = HybridEvaluatorWrapper(hybrid_model, user_history, train_data)
208
+ hybrid_results = evaluator.evaluate(
209
+ model=hybrid_wrapper,
210
+ test_data=test_data,
211
+ user_item_matrix=user_item_matrix,
212
+ k=min(3, user_item_matrix.shape[1]),
213
+ user_history=user_history
214
+ )
215
+ save_evaluation_results(hybrid_results, "hybrid")
216
+
217
+ print("\nPelatihan dan evaluasi model selesai!")
218
+
219
+ if __name__ == "__main__":
220
+ main()