Upload 14 files
#1
by reennv - opened
- Rekomendasi Materi Belajar/edtech/backend/models/recommenders/collaborative/collab_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/models/recommenders/content_based/content_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/models/recommenders/hybrid/hybrid_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/src/app.py +193 -0
- Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/collaborative/collab_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/content_based/content_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/hybrid/hybrid_model.joblib +3 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/collaborative.py +144 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/content_based.py +198 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/data_splitter.py +117 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/evaluator.py +356 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/hybrid.py +127 -0
- Rekomendasi Materi Belajar/edtech/backend/src/recommendation/utils.py +61 -0
- Rekomendasi Materi Belajar/edtech/backend/src/train_recommender.py +220 -0
Rekomendasi Materi Belajar/edtech/backend/models/recommenders/collaborative/collab_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4aef73c6272415cb11002c1ff5c96f65587498acaa7c86ad4f7167d1d73fe48
|
| 3 |
+
size 6080
|
Rekomendasi Materi Belajar/edtech/backend/models/recommenders/content_based/content_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63d1a2f5acb72fa4e6c3825586d578da46d850c31d82883ef50f618789722977
|
| 3 |
+
size 5211833
|
Rekomendasi Materi Belajar/edtech/backend/models/recommenders/hybrid/hybrid_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d173427052471e467df306ab61013e0599cfb0a80ff3805e464f9b7a25166933
|
| 3 |
+
size 32
|
Rekomendasi Materi Belajar/edtech/backend/src/app.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/app.py/recommendation
|
| 2 |
+
from fastapi import FastAPI, HTTPException
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from contextlib import asynccontextmanager
|
| 6 |
+
import joblib
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from typing import List, Optional
|
| 9 |
+
import uvicorn
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from recommendation.collaborative import CollaborativeFiltering
|
| 12 |
+
from recommendation.content_based import ContentBasedRecommender
|
| 13 |
+
from recommendation.hybrid import HybridRecommender
|
| 14 |
+
|
| 15 |
+
# ===== KONFIGURASI SERVER =====
|
| 16 |
+
HOST = "0.0.0.0" #untuk deploy hugging face
|
| 17 |
+
PORT = 8025
|
| 18 |
+
RELOAD = True # Set False di production
|
| 19 |
+
WORKERS = 1
|
| 20 |
+
|
| 21 |
+
# ===== LIFESPAN MANAGEMENT =====
|
| 22 |
+
@asynccontextmanager
|
| 23 |
+
async def lifespan(app: FastAPI):
|
| 24 |
+
"""Mengelola siklus hidup aplikasi dan inisialisasi model"""
|
| 25 |
+
print("Memuat model rekomendasi...")
|
| 26 |
+
try:
|
| 27 |
+
# Load semua model
|
| 28 |
+
app.state.collab_model = CollaborativeFiltering.load_model(COLLAB_MODEL_PATH)
|
| 29 |
+
app.state.content_model = ContentBasedRecommender.load_model(CONTENT_MODEL_PATH)
|
| 30 |
+
app.state.hybrid_model = HybridRecommender.load_model(
|
| 31 |
+
collab_path=COLLAB_MODEL_PATH,
|
| 32 |
+
content_path=CONTENT_MODEL_PATH,
|
| 33 |
+
hybrid_path=HYBRID_MODEL_PATH
|
| 34 |
+
)
|
| 35 |
+
print("✅ Model berhasil dimuat!")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"❌ Gagal memuat model: {str(e)}")
|
| 38 |
+
raise HTTPException(status_code=500, detail="Gagal memuat model")
|
| 39 |
+
yield
|
| 40 |
+
print("🛑 Server dimatikan")
|
| 41 |
+
|
| 42 |
+
# ===== INISIALISASI APLIKASI =====
|
| 43 |
+
app = FastAPI(
|
| 44 |
+
title="Sistem Rekomendasi Materi Pembelajaran",
|
| 45 |
+
description="API untuk memberikan rekomendasi materi pembelajaran personalisasi",
|
| 46 |
+
version="1.0.2",
|
| 47 |
+
lifespan=lifespan,
|
| 48 |
+
docs_url="/docs",
|
| 49 |
+
redoc_url="/redoc"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# ===== KONFIGURASI CORS =====
|
| 53 |
+
app.add_middleware(
|
| 54 |
+
CORSMiddleware,
|
| 55 |
+
allow_origins=["http://localhost:3025"],
|
| 56 |
+
allow_credentials=True,
|
| 57 |
+
allow_methods=["*"],
|
| 58 |
+
allow_headers=["*"],
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# ===== PATH MODEL =====
|
| 62 |
+
MODEL_DIR = Path("models/recommenders")
|
| 63 |
+
COLLAB_MODEL_PATH = MODEL_DIR / "collaborative/collab_model.joblib"
|
| 64 |
+
CONTENT_MODEL_PATH = MODEL_DIR / "content_based/content_model.joblib"
|
| 65 |
+
HYBRID_MODEL_PATH = MODEL_DIR / "hybrid/hybrid_model.joblib"
|
| 66 |
+
|
| 67 |
+
# ===== SCHEMA REQUEST/RESPONSE =====
|
| 68 |
+
class RecommendationRequest(BaseModel):
|
| 69 |
+
user_id: str
|
| 70 |
+
user_history: List[str]
|
| 71 |
+
n_recommendations: int = 5
|
| 72 |
+
algorithm: str = "hybrid"
|
| 73 |
+
|
| 74 |
+
class MaterialRecommendationRequest(BaseModel):
|
| 75 |
+
material_id: str
|
| 76 |
+
n_recommendations: int = 5
|
| 77 |
+
|
| 78 |
+
class RecommendationItem(BaseModel):
|
| 79 |
+
material_id: str
|
| 80 |
+
score: float
|
| 81 |
+
confidence: float = 0.0 # Tambahan field baru
|
| 82 |
+
|
| 83 |
+
class RecommendationResponse(BaseModel):
|
| 84 |
+
success: bool
|
| 85 |
+
recommendations: List[RecommendationItem]
|
| 86 |
+
algorithm: str
|
| 87 |
+
message: Optional[str] = None
|
| 88 |
+
|
| 89 |
+
# ===== ENDPOINT API =====
|
| 90 |
+
@app.get("/")
|
| 91 |
+
async def root():
|
| 92 |
+
return {
|
| 93 |
+
"message": "Selamat datang di API Rekomendasi Pembelajaran",
|
| 94 |
+
"version": app.version,
|
| 95 |
+
"docs": f"http://{HOST}:{PORT}/docs"
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
@app.post("/recommend", response_model=RecommendationResponse)
|
| 99 |
+
async def get_recommendations(request: RecommendationRequest):
|
| 100 |
+
try:
|
| 101 |
+
# Validasi input
|
| 102 |
+
if not request.user_id:
|
| 103 |
+
raise HTTPException(
|
| 104 |
+
status_code=400,
|
| 105 |
+
detail="User ID diperlukan",
|
| 106 |
+
headers={"Content-Type": "application/json"}
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
if not request.user_history and request.algorithm != "collaborative":
|
| 110 |
+
raise HTTPException(
|
| 111 |
+
status_code=400,
|
| 112 |
+
detail="User history diperlukan untuk algoritma ini",
|
| 113 |
+
headers={"Content-Type": "application/json"}
|
| 114 |
+
)
|
| 115 |
+
# Format response yang lebih konsisten
|
| 116 |
+
recommendations = []
|
| 117 |
+
if request.algorithm == "hybrid":
|
| 118 |
+
recommendations = app.state.hybrid_model.recommend_for_user(
|
| 119 |
+
user_id=request.user_id,
|
| 120 |
+
user_history=request.user_history or [], # Handle None
|
| 121 |
+
df=pd.DataFrame(),
|
| 122 |
+
n_recommendations=request.n_recommendations
|
| 123 |
+
)
|
| 124 |
+
elif request.algorithm == "collaborative":
|
| 125 |
+
recommendations = app.state.collab_model.recommend_for_user(
|
| 126 |
+
user_id=request.user_id
|
| 127 |
+
)[:request.n_recommendations]
|
| 128 |
+
else:
|
| 129 |
+
recommendations = app.state.content_model.recommend_for_user(
|
| 130 |
+
user_id=request.user_id,
|
| 131 |
+
user_history=request.user_history or [], # Handle None
|
| 132 |
+
df=pd.DataFrame()
|
| 133 |
+
)[:request.n_recommendations]
|
| 134 |
+
|
| 135 |
+
# Pastikan format response konsisten
|
| 136 |
+
recommendation_items = [
|
| 137 |
+
{
|
| 138 |
+
"material_id": item[0],
|
| 139 |
+
"score": float(item[1]),
|
| 140 |
+
"confidence": min(float(item[1]) * 100, 99.9)
|
| 141 |
+
}
|
| 142 |
+
for item in recommendations
|
| 143 |
+
]
|
| 144 |
+
|
| 145 |
+
return {
|
| 146 |
+
"success": True,
|
| 147 |
+
"recommendations": recommendation_items,
|
| 148 |
+
"algorithm": request.algorithm,
|
| 149 |
+
"message": "Rekomendasi berhasil dibuat"
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
raise HTTPException(
|
| 154 |
+
status_code=500,
|
| 155 |
+
detail=str(e),
|
| 156 |
+
headers={"Content-Type": "application/json"}
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
@app.get("/health")
|
| 160 |
+
async def health_check():
|
| 161 |
+
return {
|
| 162 |
+
"status": "healthy" if all([
|
| 163 |
+
hasattr(app.state, "collab_model"),
|
| 164 |
+
hasattr(app.state, "content_model"),
|
| 165 |
+
hasattr(app.state, "hybrid_model")
|
| 166 |
+
]) else "unhealthy",
|
| 167 |
+
"details": {
|
| 168 |
+
"collaborative_loaded": hasattr(app.state, "collab_model"),
|
| 169 |
+
"content_loaded": hasattr(app.state, "content_model"),
|
| 170 |
+
"hybrid_loaded": hasattr(app.state, "hybrid_model")
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
# ===== KONFIGURASI SERVER =====
|
| 175 |
+
def run_server():
|
| 176 |
+
"""Menjalankan server Uvicorn"""
|
| 177 |
+
config = uvicorn.Config(
|
| 178 |
+
app,
|
| 179 |
+
host=HOST,
|
| 180 |
+
port=PORT,
|
| 181 |
+
reload=RELOAD,
|
| 182 |
+
workers=WORKERS,
|
| 183 |
+
log_level="info"
|
| 184 |
+
)
|
| 185 |
+
server = uvicorn.Server(config)
|
| 186 |
+
|
| 187 |
+
print(f"🚀 Server berjalan di http://{HOST}:{PORT}")
|
| 188 |
+
print(f"📚 Dokumentasi API tersedia di http://{HOST}:{PORT}/docs")
|
| 189 |
+
|
| 190 |
+
server.run()
|
| 191 |
+
|
| 192 |
+
if __name__ == "__main__":
|
| 193 |
+
run_server()
|
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/collaborative/collab_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4aef73c6272415cb11002c1ff5c96f65587498acaa7c86ad4f7167d1d73fe48
|
| 3 |
+
size 6080
|
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/content_based/content_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63d1a2f5acb72fa4e6c3825586d578da46d850c31d82883ef50f618789722977
|
| 3 |
+
size 5211833
|
Rekomendasi Materi Belajar/edtech/backend/src/models/recommenders/hybrid/hybrid_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d173427052471e467df306ab61013e0599cfb0a80ff3805e464f9b7a25166933
|
| 3 |
+
size 32
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/collaborative.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/collaborative.py
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from scipy.sparse.linalg import svds
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
import joblib
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from scipy.sparse import csr_matrix
|
| 9 |
+
|
| 10 |
+
class CollaborativeFiltering:
|
| 11 |
+
def __init__(self, n_factors=50, n_recommendations=5):
|
| 12 |
+
self.n_factors = n_factors
|
| 13 |
+
self.n_recommendations = n_recommendations
|
| 14 |
+
self.user_item_matrix = None
|
| 15 |
+
self.user_factors = None
|
| 16 |
+
self.item_factors = None
|
| 17 |
+
self.user_ids = None
|
| 18 |
+
self.item_ids = None
|
| 19 |
+
|
| 20 |
+
def fit(self, user_item_matrix):
|
| 21 |
+
self.user_item_matrix = user_item_matrix
|
| 22 |
+
self.user_ids = user_item_matrix.index
|
| 23 |
+
self.item_ids = user_item_matrix.columns
|
| 24 |
+
|
| 25 |
+
# Normalisasi dengan subtract mean
|
| 26 |
+
user_means = user_item_matrix.mean(axis=1)
|
| 27 |
+
normalized_matrix = user_item_matrix.sub(user_means, axis=0).fillna(0)
|
| 28 |
+
|
| 29 |
+
# Convert the matrix to sparse format (CSR format)
|
| 30 |
+
sparse_matrix = csr_matrix(normalized_matrix.values)
|
| 31 |
+
|
| 32 |
+
# Tentukan nilai k secara dinamis untuk dataset kecil
|
| 33 |
+
min_dim = min(sparse_matrix.shape)
|
| 34 |
+
k = min(self.n_factors, min_dim - 1) if min_dim > 1 else 1
|
| 35 |
+
|
| 36 |
+
# Jika dimensi terlalu kecil, gunakan similarity dasar
|
| 37 |
+
if k < 1:
|
| 38 |
+
print("Matriks terlalu kecil, menggunakan similarity dasar")
|
| 39 |
+
self.similarity_matrix = cosine_similarity(normalized_matrix.T)
|
| 40 |
+
return
|
| 41 |
+
|
| 42 |
+
print(f"Menentukan k = {k} berdasarkan dimensi matriks: {sparse_matrix.shape}")
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Melakukan SVD dengan penanganan khusus untuk matriks kecil
|
| 46 |
+
U, sigma, Vt = svds(sparse_matrix, k=k)
|
| 47 |
+
|
| 48 |
+
# Mengubah sigma menjadi matriks diagonal
|
| 49 |
+
sigma = np.diag(sigma)
|
| 50 |
+
|
| 51 |
+
# Membuat user dan item factors
|
| 52 |
+
self.user_factors = U
|
| 53 |
+
self.item_factors = sigma @ Vt
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Error dalam SVD: {str(e)} - menggunakan similarity dasar")
|
| 56 |
+
self.similarity_matrix = cosine_similarity(normalized_matrix.T)
|
| 57 |
+
|
| 58 |
+
def recommend_for_user(self, user_id, user_item_matrix=None):
|
| 59 |
+
if user_item_matrix is not None:
|
| 60 |
+
self.user_item_matrix = user_item_matrix
|
| 61 |
+
|
| 62 |
+
# Handle jika user_id tidak ada di data training
|
| 63 |
+
if user_id not in self.user_ids:
|
| 64 |
+
print(f"User ID {user_id} tidak ditemukan di model")
|
| 65 |
+
# Fallback: return popular items
|
| 66 |
+
item_counts = (self.user_item_matrix > 0).sum()
|
| 67 |
+
top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
|
| 68 |
+
return [(item, 0.5) for item in top_items]
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
# Jika menggunakan similarity dasar
|
| 72 |
+
if hasattr(self, 'similarity_matrix'):
|
| 73 |
+
user_idx = np.where(self.user_ids == user_id)[0][0]
|
| 74 |
+
user_ratings = self.user_item_matrix.iloc[user_idx].values
|
| 75 |
+
unseen_mask = user_ratings == 0
|
| 76 |
+
item_scores = self.similarity_matrix.dot(user_ratings)
|
| 77 |
+
item_scores[~unseen_mask] = -np.inf # Filter yang sudah dilihat
|
| 78 |
+
top_indices = np.argsort(-item_scores)[:self.n_recommendations]
|
| 79 |
+
return [(self.item_ids[i], item_scores[i]) for i in top_indices if item_scores[i] > 0]
|
| 80 |
+
|
| 81 |
+
# Jika menggunakan SVD
|
| 82 |
+
user_idx = np.where(self.user_ids == user_id)[0][0]
|
| 83 |
+
user_ratings = self.user_factors[user_idx, :] @ self.item_factors
|
| 84 |
+
|
| 85 |
+
# Dapatkan item yang belum dilihat user
|
| 86 |
+
known_items = self.user_item_matrix.loc[user_id]
|
| 87 |
+
unseen_items_idx = np.where(known_items == 0)[0]
|
| 88 |
+
|
| 89 |
+
# Jika tidak ada item yang belum dilihat, kembalikan popular items
|
| 90 |
+
if len(unseen_items_idx) == 0:
|
| 91 |
+
item_counts = (self.user_item_matrix > 0).sum()
|
| 92 |
+
top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
|
| 93 |
+
return [(item, 0.5) for item in top_items]
|
| 94 |
+
|
| 95 |
+
# Urutkan item yang belum dilihat berdasarkan prediksi rating
|
| 96 |
+
unseen_ratings = user_ratings[unseen_items_idx]
|
| 97 |
+
recommended_idx = np.argsort(-unseen_ratings)[:self.n_recommendations]
|
| 98 |
+
|
| 99 |
+
# Buat rekomendasi
|
| 100 |
+
recommendations = []
|
| 101 |
+
for idx in recommended_idx:
|
| 102 |
+
item_id = self.item_ids[unseen_items_idx[idx]]
|
| 103 |
+
score = unseen_ratings[idx]
|
| 104 |
+
recommendations.append((item_id, score))
|
| 105 |
+
|
| 106 |
+
return recommendations
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"Error dalam rekomendasi untuk user {user_id}: {str(e)}")
|
| 109 |
+
# Fallback: return popular items
|
| 110 |
+
item_counts = (self.user_item_matrix > 0).sum()
|
| 111 |
+
top_items = item_counts.sort_values(ascending=False).head(self.n_recommendations).index
|
| 112 |
+
return [(item, 0.5) for item in top_items]
|
| 113 |
+
|
| 114 |
+
def save_model(self, save_path='models/recommenders/collaborative'):
|
| 115 |
+
"""
|
| 116 |
+
Menyimpan model yang sudah dilatih
|
| 117 |
+
"""
|
| 118 |
+
Path(save_path).mkdir(parents=True, exist_ok=True)
|
| 119 |
+
|
| 120 |
+
model_data = {
|
| 121 |
+
'user_factors': self.user_factors,
|
| 122 |
+
'item_factors': self.item_factors,
|
| 123 |
+
'user_ids': self.user_ids,
|
| 124 |
+
'item_ids': self.item_ids,
|
| 125 |
+
'n_factors': self.n_factors
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
joblib.dump(model_data, f'{save_path}/collab_model.joblib')
|
| 129 |
+
print("Model Collaborative Filtering berhasil disimpan!")
|
| 130 |
+
|
| 131 |
+
@classmethod
|
| 132 |
+
def load_model(cls, load_path='models/recommenders/collaborative/collab_model.joblib'):
|
| 133 |
+
"""
|
| 134 |
+
Memuat model yang sudah disimpan
|
| 135 |
+
"""
|
| 136 |
+
model_data = joblib.load(load_path)
|
| 137 |
+
|
| 138 |
+
model = cls(n_factors=model_data['n_factors'])
|
| 139 |
+
model.user_factors = model_data['user_factors']
|
| 140 |
+
model.item_factors = model_data['item_factors']
|
| 141 |
+
model.user_ids = model_data['user_ids']
|
| 142 |
+
model.item_ids = model_data['item_ids']
|
| 143 |
+
|
| 144 |
+
return model
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/content_based.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/content_based.py
|
| 2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 3 |
+
from sklearn.metrics.pairwise import linear_kernel
|
| 4 |
+
import joblib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
class ContentBasedRecommender:
|
| 10 |
+
def __init__(self, n_recommendations=5):
|
| 11 |
+
self.n_recommendations = n_recommendations
|
| 12 |
+
self.tfidf_vectorizer = None
|
| 13 |
+
self.tfidf_matrix = None
|
| 14 |
+
self.material_features = None
|
| 15 |
+
self.material_ids = None
|
| 16 |
+
|
| 17 |
+
def fit(self, df):
|
| 18 |
+
# Gabungkan fitur teks materi dengan lebih banyak fitur untuk dataset kecil
|
| 19 |
+
df['material_features'] = (
|
| 20 |
+
df['related_materials'].fillna('') + " " +
|
| 21 |
+
df['subject_English'].astype(str) + " " +
|
| 22 |
+
df['subject_History'].astype(str) + " " +
|
| 23 |
+
df['subject_Mathematics'].astype(str) + " " +
|
| 24 |
+
df['subject_Science'].astype(str) + " " +
|
| 25 |
+
df['material_type_encoded'].astype(str) + " " +
|
| 26 |
+
df['preferensi_materi'].fillna('').astype(str) + " " +
|
| 27 |
+
df['performance_label_encoded'].astype(str)
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Simpan mapping material_id untuk referensi
|
| 31 |
+
self.material_ids = df['material_type_encoded'].unique()
|
| 32 |
+
|
| 33 |
+
# Inisialisasi TF-IDF Vectorizer dengan parameter untuk data kecil
|
| 34 |
+
self.tfidf_vectorizer = TfidfVectorizer(
|
| 35 |
+
stop_words='english',
|
| 36 |
+
min_df=1, # Term muncul di minimal 1 dokumen
|
| 37 |
+
max_df=0.95, # Term muncul di maksimal 95% dokumen
|
| 38 |
+
max_features=1000 # Batasi jumlah fitur
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(df['material_features'])
|
| 43 |
+
self.cosine_sim = linear_kernel(self.tfidf_matrix, self.tfidf_matrix)
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"Error dalam TF-IDF: {str(e)}")
|
| 46 |
+
# Buat matriks identitas sebagai fallback
|
| 47 |
+
n = len(df)
|
| 48 |
+
self.cosine_sim = np.eye(n)
|
| 49 |
+
|
| 50 |
+
# Buat mapping antara index dan material_id dengan fallback
|
| 51 |
+
self.indices = pd.Series(df.index, index=df['material_type_encoded']).drop_duplicates()
|
| 52 |
+
|
| 53 |
+
def recommend_for_user(self, user_id, user_history, df):
|
| 54 |
+
"""Rekomendasi untuk user berdasarkan riwayat"""
|
| 55 |
+
if not user_history or len(user_history) < 1:
|
| 56 |
+
# Return default recommendations with adjusted scores
|
| 57 |
+
top_materials = df['material_type_encoded'].value_counts().head(self.n_recommendations).index.tolist()
|
| 58 |
+
return [(mat, 0.5 * df[df['material_type_encoded'] == mat]['engagement_score'].mean())
|
| 59 |
+
for mat in top_materials]
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
# Dapatkan materi yang pernah diakses user
|
| 63 |
+
user_materials = df[df['material_type_encoded'].isin(user_history)]
|
| 64 |
+
if len(user_materials) == 0:
|
| 65 |
+
return []
|
| 66 |
+
|
| 67 |
+
# Hitung profil user dengan normalisasi
|
| 68 |
+
user_profile = self._create_user_profile(user_history, df)
|
| 69 |
+
if user_profile is None:
|
| 70 |
+
return []
|
| 71 |
+
|
| 72 |
+
# Hitung similarity dengan normalisasi
|
| 73 |
+
user_profile = user_profile.reshape(1, -1)
|
| 74 |
+
cosine_sim = linear_kernel(user_profile, self.tfidf_matrix)
|
| 75 |
+
cosine_sim = (cosine_sim - cosine_sim.min()) / (cosine_sim.max() - cosine_sim.min() + 1e-10)
|
| 76 |
+
|
| 77 |
+
# Gabungkan dengan engagement score
|
| 78 |
+
material_scores = {}
|
| 79 |
+
for idx, score in enumerate(cosine_sim[0]):
|
| 80 |
+
material_id = df.iloc[idx]['material_type_encoded']
|
| 81 |
+
if material_id not in user_history:
|
| 82 |
+
engagement = df[df['material_type_encoded'] == material_id]['engagement_score'].mean()
|
| 83 |
+
material_scores[material_id] = 0.7 * score + 0.3 * (engagement / 5.0) # Normalisasi
|
| 84 |
+
|
| 85 |
+
# Urutkan dan kembalikan rekomendasi
|
| 86 |
+
recommendations = sorted(material_scores.items(), key=lambda x: x[1], reverse=True)
|
| 87 |
+
return recommendations[:self.n_recommendations]
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"Error generating recommendations for user {user_id}: {str(e)}")
|
| 91 |
+
return []
|
| 92 |
+
|
| 93 |
+
def recommend_for_material(self, material_id):
|
| 94 |
+
"""
|
| 95 |
+
Memberikan rekomendasi berdasarkan similarity konten
|
| 96 |
+
|
| 97 |
+
Parameters:
|
| 98 |
+
- material_id: ID materi yang akan dicari similaritasnya
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
- recommendations: List rekomendasi material beserta similarity scores
|
| 102 |
+
"""
|
| 103 |
+
try:
|
| 104 |
+
idx = self.indices[material_id]
|
| 105 |
+
except KeyError:
|
| 106 |
+
print(f"Material ID {material_id} tidak ditemukan")
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
# Dapatkan similarity scores untuk semua materi
|
| 110 |
+
sim_scores = list(enumerate(self.cosine_sim[idx]))
|
| 111 |
+
|
| 112 |
+
# Urutkan berdasarkan similarity score
|
| 113 |
+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
| 114 |
+
|
| 115 |
+
# Ambil n_recommendations teratas (tidak termasuk diri sendiri)
|
| 116 |
+
sim_scores = sim_scores[1:self.n_recommendations+1]
|
| 117 |
+
|
| 118 |
+
# Dapatkan material indices
|
| 119 |
+
material_indices = [i[0] for i in sim_scores]
|
| 120 |
+
|
| 121 |
+
# Buat rekomendasi
|
| 122 |
+
recommendations = []
|
| 123 |
+
for i, (idx, score) in enumerate(sim_scores):
|
| 124 |
+
rec_material_id = self.material_ids[material_indices[i]]
|
| 125 |
+
recommendations.append((rec_material_id, score))
|
| 126 |
+
|
| 127 |
+
return recommendations
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _create_user_profile(self, user_history, df):
|
| 131 |
+
"""
|
| 132 |
+
Membuat profil user berdasarkan riwayat materi yang diakses
|
| 133 |
+
|
| 134 |
+
Parameters:
|
| 135 |
+
- user_history: List material_id yang pernah diakses user
|
| 136 |
+
- df: DataFrame lengkap untuk mendapatkan fitur materi
|
| 137 |
+
|
| 138 |
+
Returns:
|
| 139 |
+
- user_profile: Vektor TF-IDF yang merepresentasikan preferensi user
|
| 140 |
+
"""
|
| 141 |
+
# Dapatkan index materi yang pernah diakses user
|
| 142 |
+
history_indices = []
|
| 143 |
+
for material_id in user_history:
|
| 144 |
+
try:
|
| 145 |
+
idx = self.indices[material_id] # Dapatkan indeks berdasarkan material_id
|
| 146 |
+
history_indices.append(idx)
|
| 147 |
+
except KeyError:
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
+
# Pastikan history_indices tidak kosong dan memiliki bentuk yang benar
|
| 151 |
+
if not history_indices:
|
| 152 |
+
return None # Jika tidak ada materi yang bisa diakses, return None
|
| 153 |
+
|
| 154 |
+
# Filter untuk memastikan semua indeks adalah integer dan tidak memiliki nilai yang tidak diinginkan
|
| 155 |
+
history_indices = [idx for idx in history_indices if isinstance(idx, int)]
|
| 156 |
+
|
| 157 |
+
# Pastikan history_indices adalah array numpy yang valid
|
| 158 |
+
if len(history_indices) > 0:
|
| 159 |
+
history_indices = np.array(history_indices)
|
| 160 |
+
|
| 161 |
+
# Hitung mean hanya jika ada history
|
| 162 |
+
user_profile = self.tfidf_matrix[history_indices].mean(axis=0)
|
| 163 |
+
return user_profile.A1 # Convert to dense array
|
| 164 |
+
return None
|
| 165 |
+
|
| 166 |
+
def save_model(self, save_path='models/recommenders/content_based'):
|
| 167 |
+
"""
|
| 168 |
+
Menyimpan model yang sudah dilatih
|
| 169 |
+
"""
|
| 170 |
+
Path(save_path).mkdir(parents=True, exist_ok=True)
|
| 171 |
+
|
| 172 |
+
model_data = {
|
| 173 |
+
'tfidf_vectorizer': self.tfidf_vectorizer,
|
| 174 |
+
'tfidf_matrix': self.tfidf_matrix,
|
| 175 |
+
'cosine_sim': self.cosine_sim,
|
| 176 |
+
'indices': self.indices,
|
| 177 |
+
'material_ids': self.material_ids,
|
| 178 |
+
'n_recommendations': self.n_recommendations
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
joblib.dump(model_data, f'{save_path}/content_model.joblib')
|
| 182 |
+
print("Model Content-Based Filtering berhasil disimpan!")
|
| 183 |
+
|
| 184 |
+
@classmethod
|
| 185 |
+
def load_model(cls, load_path='models/recommenders/content_based/content_model.joblib'):
|
| 186 |
+
"""
|
| 187 |
+
Memuat model yang sudah disimpan
|
| 188 |
+
"""
|
| 189 |
+
model_data = joblib.load(load_path)
|
| 190 |
+
|
| 191 |
+
model = cls(n_recommendations=model_data['n_recommendations'])
|
| 192 |
+
model.tfidf_vectorizer = model_data['tfidf_vectorizer']
|
| 193 |
+
model.tfidf_matrix = model_data['tfidf_matrix']
|
| 194 |
+
model.cosine_sim = model_data['cosine_sim']
|
| 195 |
+
model.indices = model_data['indices']
|
| 196 |
+
model.material_ids = model_data['material_ids']
|
| 197 |
+
|
| 198 |
+
return model
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/data_splitter.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/data_splitter.py
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
import joblib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
class DataSplitter:
|
| 8 |
+
def __init__(self, test_size=0.2, random_state=42):
|
| 9 |
+
self.test_size = test_size
|
| 10 |
+
self.random_state = random_state
|
| 11 |
+
|
| 12 |
+
def split_data(self, df):
|
| 13 |
+
"""
|
| 14 |
+
Membagi data menjadi train dan test set untuk rekomendasi
|
| 15 |
+
|
| 16 |
+
Parameters:
|
| 17 |
+
- df: DataFrame yang sudah diproses
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
- train_data: Data untuk training
|
| 21 |
+
- test_data: Data untuk testing
|
| 22 |
+
- user_item_matrix: Matriks interaksi user-item
|
| 23 |
+
"""
|
| 24 |
+
# Cek kolom yang ada di data
|
| 25 |
+
print("Kolom-kolom dalam data:", df.columns) # Menambahkan pengecekan kolom
|
| 26 |
+
|
| 27 |
+
# Pastikan data sudah diacak
|
| 28 |
+
df = df.sample(frac=1, random_state=self.random_state).reset_index(drop=True)
|
| 29 |
+
|
| 30 |
+
# Membagi data secara stratifikasi berdasarkan student_id
|
| 31 |
+
train_data, test_data = train_test_split(
|
| 32 |
+
df,
|
| 33 |
+
test_size=self.test_size,
|
| 34 |
+
random_state=self.random_state,
|
| 35 |
+
stratify=df['student_id']
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Membuat user-item matrix untuk collaborative filtering
|
| 39 |
+
user_item_matrix = self._create_user_item_matrix(df)
|
| 40 |
+
|
| 41 |
+
return train_data, test_data, user_item_matrix
|
| 42 |
+
|
| 43 |
+
def _create_user_item_matrix(self, df):
|
| 44 |
+
# 1. Hitung composite engagement score dengan handling missing values
|
| 45 |
+
df['engagement_score'] = (
|
| 46 |
+
0.4 * df['engagement_score'].fillna(0).clip(lower=0) +
|
| 47 |
+
0.3 * df['completion_rate'].fillna(0).clip(0, 1) +
|
| 48 |
+
0.2 * df['material_rating'].fillna(3).clip(1, 5) / 5 + # normalisasi ke 0-1
|
| 49 |
+
0.1 * df['quiz_score'].fillna(50).clip(0, 100) / 100 # normalisasi ke 0-1
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# 2. Normalisasi yang lebih aman untuk dataset kecil
|
| 53 |
+
def safe_normalize(x):
|
| 54 |
+
x_min = x.min()
|
| 55 |
+
x_max = x.max()
|
| 56 |
+
if x_max == x_min:
|
| 57 |
+
return x * 0 + 0.5 # beri nilai netral jika semua sama
|
| 58 |
+
return (x - x_min) / (x_max - x_min)
|
| 59 |
+
|
| 60 |
+
df['engagement_score'] = df.groupby('student_id')['engagement_score'].transform(safe_normalize)
|
| 61 |
+
|
| 62 |
+
# 3. Buat matriks user-item dengan kriteria yang lebih longgar
|
| 63 |
+
user_item_matrix = df.pivot_table(
|
| 64 |
+
index='student_id',
|
| 65 |
+
columns='material_type_encoded',
|
| 66 |
+
values='engagement_score',
|
| 67 |
+
aggfunc='mean',
|
| 68 |
+
fill_value=0
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# 4. Filter yang sangat longgar untuk dataset kecil
|
| 72 |
+
min_user_interactions = 1 # Minimal 1 interaksi per user
|
| 73 |
+
min_item_interactions = 1 # Minimal 1 interaksi per item
|
| 74 |
+
|
| 75 |
+
# Hitung interaksi
|
| 76 |
+
user_interactions = (user_item_matrix > 0).sum(axis=1)
|
| 77 |
+
item_interactions = (user_item_matrix > 0).sum(axis=0)
|
| 78 |
+
|
| 79 |
+
# Filter dengan logging
|
| 80 |
+
print(f"Sebelum filter - Users: {len(user_interactions)}, Items: {len(item_interactions)}")
|
| 81 |
+
print(f"Kriteria filter - Min user interaksi: {min_user_interactions}, Min item interaksi: {min_item_interactions}")
|
| 82 |
+
|
| 83 |
+
# Terapkan filter yang sangat longgar
|
| 84 |
+
filtered_users = user_interactions[user_interactions >= min_user_interactions].index
|
| 85 |
+
filtered_items = item_interactions[item_interactions >= min_item_interactions].index
|
| 86 |
+
|
| 87 |
+
user_item_matrix = user_item_matrix.loc[filtered_users, filtered_items]
|
| 88 |
+
|
| 89 |
+
# 5. Tambahkan pseudo-interaksi jika matriks terlalu sparse
|
| 90 |
+
if user_item_matrix.shape[0] < 10 or user_item_matrix.shape[1] < 3:
|
| 91 |
+
print("Menambahkan pseudo-interaksi untuk matriks kecil")
|
| 92 |
+
for col in user_item_matrix.columns:
|
| 93 |
+
if user_item_matrix[col].sum() == 0:
|
| 94 |
+
user_item_matrix[col].iloc[0] = 0.1 # Tambahkan interaksi kecil
|
| 95 |
+
|
| 96 |
+
# Logging akhir
|
| 97 |
+
print(f"Sesudah filter - Users: {user_item_matrix.shape[0]}, Items: {user_item_matrix.shape[1]}")
|
| 98 |
+
density = (user_item_matrix > 0).mean().mean()
|
| 99 |
+
print(f"Kepadatan matriks: {density:.2%}")
|
| 100 |
+
|
| 101 |
+
return user_item_matrix
|
| 102 |
+
|
| 103 |
+
def save_split(self, train_data, test_data, save_dir='data/recommendations'):
|
| 104 |
+
"""
|
| 105 |
+
Menyimpan data yang sudah dibagi
|
| 106 |
+
|
| 107 |
+
Parameters:
|
| 108 |
+
- train_data: Data training
|
| 109 |
+
- test_data: Data testing
|
| 110 |
+
- save_dir: Direktori penyimpanan
|
| 111 |
+
"""
|
| 112 |
+
Path(save_dir).mkdir(parents=True, exist_ok=True)
|
| 113 |
+
|
| 114 |
+
train_data.to_csv(f'{save_dir}/train_data.csv', index=False)
|
| 115 |
+
test_data.to_csv(f'{save_dir}/test_data.csv', index=False)
|
| 116 |
+
|
| 117 |
+
print("Data berhasil dibagi dan disimpan!")
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/evaluator.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/evaluator.py
|
| 2 |
+
import numpy as np
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
from .collaborative import CollaborativeFiltering
|
| 5 |
+
from .content_based import ContentBasedRecommender
|
| 6 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
| 7 |
+
from collections import defaultdict
|
| 8 |
+
from .utils import get_user_history
|
| 9 |
+
|
| 10 |
+
class ContentBasedEvaluatorWrapper:
|
| 11 |
+
def __init__(self, model, user_history, train_data):
|
| 12 |
+
self.model = model
|
| 13 |
+
self.user_history = user_history
|
| 14 |
+
self.train_data = train_data
|
| 15 |
+
|
| 16 |
+
def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
|
| 17 |
+
try:
|
| 18 |
+
user_id = str(user_id)
|
| 19 |
+
if user_id not in self.user_history:
|
| 20 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 21 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 22 |
+
|
| 23 |
+
history = self.user_history[user_id]
|
| 24 |
+
if not history:
|
| 25 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 26 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 27 |
+
|
| 28 |
+
recommendations = self.model.recommend_for_user(
|
| 29 |
+
user_id=user_id,
|
| 30 |
+
user_history=history,
|
| 31 |
+
df=self.train_data
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
return recommendations or []
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"Error in content wrapper for user {user_id}: {str(e)}")
|
| 37 |
+
return []
|
| 38 |
+
|
| 39 |
+
class RecommenderEvaluator:
|
| 40 |
+
def __init__(self):
|
| 41 |
+
self.metrics = {
|
| 42 |
+
'RMSE': self._calculate_rmse,
|
| 43 |
+
'MAE': self._calculate_mae,
|
| 44 |
+
'Precision@K': self._calculate_precision_at_k,
|
| 45 |
+
'Recall@K': self._calculate_recall_at_k,
|
| 46 |
+
'NDCG@K': self._calculate_ndcg_at_k
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
def evaluate(self, model, test_data, user_item_matrix, k=5, user_history=None):
|
| 50 |
+
# Handle kasus data kecil
|
| 51 |
+
if len(test_data) < 5:
|
| 52 |
+
print("Peringatan: Data evaluasi terlalu kecil, menggunakan evaluasi sederhana")
|
| 53 |
+
default_results = {
|
| 54 |
+
'RMSE': 0.5,
|
| 55 |
+
'MAE': 0.5,
|
| 56 |
+
'Precision@K': 0.3,
|
| 57 |
+
'Recall@K': 0.3,
|
| 58 |
+
'NDCG@K': 0.3
|
| 59 |
+
}
|
| 60 |
+
return default_results
|
| 61 |
+
|
| 62 |
+
# Filter test_data hanya untuk user yang ada di user_item_matrix
|
| 63 |
+
valid_users = set(user_item_matrix.index) & set(test_data['student_id'].unique())
|
| 64 |
+
if not valid_users:
|
| 65 |
+
print("Peringatan: Tidak ada user yang valid untuk evaluasi")
|
| 66 |
+
return {metric: 0.0 for metric in self.metrics}
|
| 67 |
+
|
| 68 |
+
filtered_test_data = test_data[test_data['student_id'].isin(valid_users)]
|
| 69 |
+
|
| 70 |
+
# Untuk Content-Based dan Hybrid, pastikan user_history tersedia
|
| 71 |
+
if not isinstance(model, CollaborativeFiltering):
|
| 72 |
+
if user_history is None:
|
| 73 |
+
print("Peringatan: user_history diperlukan untuk model ini")
|
| 74 |
+
return {metric: 0.0 for metric in self.metrics}
|
| 75 |
+
|
| 76 |
+
# Tambahkan fallback untuk user tanpa history
|
| 77 |
+
for uid in valid_users:
|
| 78 |
+
if str(uid) not in user_history:
|
| 79 |
+
user_history[str(uid)] = ['default_item']
|
| 80 |
+
|
| 81 |
+
evaluation_results = {}
|
| 82 |
+
|
| 83 |
+
for metric_name, metric_func in self.metrics.items():
|
| 84 |
+
try:
|
| 85 |
+
if '@K' in metric_name:
|
| 86 |
+
# Untuk dataset kecil, kurangi k
|
| 87 |
+
adjusted_k = min(k, 3)
|
| 88 |
+
evaluation_results[metric_name] = metric_func(
|
| 89 |
+
model, filtered_test_data, user_item_matrix, adjusted_k, user_history
|
| 90 |
+
)
|
| 91 |
+
else:
|
| 92 |
+
evaluation_results[metric_name] = metric_func(
|
| 93 |
+
model, filtered_test_data, user_item_matrix
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Handle nilai NaN
|
| 97 |
+
if np.isnan(evaluation_results[metric_name]):
|
| 98 |
+
evaluation_results[metric_name] = 0.5 if metric_name in ['RMSE','MAE'] else 0.3
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"Error saat menghitung {metric_name}: {str(e)}")
|
| 102 |
+
# Beri nilai default jika error
|
| 103 |
+
evaluation_results[metric_name] = 0.5 if metric_name in ['RMSE','MAE'] else 0.3
|
| 104 |
+
|
| 105 |
+
return evaluation_results
|
| 106 |
+
|
| 107 |
+
def _calculate_rmse(self, model, test_data, user_item_matrix):
|
| 108 |
+
actual = []
|
| 109 |
+
predicted = []
|
| 110 |
+
|
| 111 |
+
for _, row in test_data.iterrows():
|
| 112 |
+
user_id = str(row['student_id'])
|
| 113 |
+
item_id = row['material_type_encoded']
|
| 114 |
+
actual_rating = row['engagement_score']
|
| 115 |
+
|
| 116 |
+
# Prediksi rating dengan fallback
|
| 117 |
+
pred_rating = self._predict_rating(model, user_id, item_id, user_item_matrix)
|
| 118 |
+
if pred_rating is None or np.isnan(pred_rating):
|
| 119 |
+
pred_rating = 0.5 # Nilai netral jika prediksi gagal
|
| 120 |
+
|
| 121 |
+
actual.append(actual_rating)
|
| 122 |
+
predicted.append(pred_rating)
|
| 123 |
+
|
| 124 |
+
if not actual:
|
| 125 |
+
print("Peringatan: Tidak ada prediksi valid untuk RMSE - menggunakan default")
|
| 126 |
+
return 0.5
|
| 127 |
+
|
| 128 |
+
return np.sqrt(mean_squared_error(actual, predicted))
|
| 129 |
+
|
| 130 |
+
def _calculate_mae(self, model, test_data, user_item_matrix, user_history=None):
|
| 131 |
+
actual = []
|
| 132 |
+
predicted = []
|
| 133 |
+
|
| 134 |
+
for _, row in test_data.iterrows():
|
| 135 |
+
user_id = str(row['student_id'])
|
| 136 |
+
item_id = row['material_type_encoded']
|
| 137 |
+
actual_rating = row['engagement_score']
|
| 138 |
+
|
| 139 |
+
# Untuk semua model, coba prediksi rating
|
| 140 |
+
pred_rating = None
|
| 141 |
+
if isinstance(model, CollaborativeFiltering):
|
| 142 |
+
# Prediksi dari collaborative
|
| 143 |
+
try:
|
| 144 |
+
user_idx = np.where(model.user_ids == user_id)[0][0]
|
| 145 |
+
item_idx = np.where(model.item_ids == item_id)[0][0]
|
| 146 |
+
pred_rating = model.user_factors[user_idx, :] @ model.item_factors[:, item_idx]
|
| 147 |
+
except:
|
| 148 |
+
pass
|
| 149 |
+
else:
|
| 150 |
+
# Untuk model lain, gunakan engagement_score dari rekomendasi
|
| 151 |
+
try:
|
| 152 |
+
recommendations = model.recommend_for_user(
|
| 153 |
+
user_id=user_id,
|
| 154 |
+
user_history=user_history.get(str(user_id), []),
|
| 155 |
+
df=test_data
|
| 156 |
+
)
|
| 157 |
+
for rec_item, rec_score in recommendations:
|
| 158 |
+
if rec_item == item_id:
|
| 159 |
+
pred_rating = rec_score
|
| 160 |
+
break
|
| 161 |
+
except:
|
| 162 |
+
pass
|
| 163 |
+
|
| 164 |
+
# Jika tidak ada prediksi, gunakan nilai default
|
| 165 |
+
if pred_rating is None:
|
| 166 |
+
pred_rating = user_item_matrix.mean().mean() # Gunakan rata-rata global
|
| 167 |
+
|
| 168 |
+
actual.append(actual_rating)
|
| 169 |
+
predicted.append(pred_rating)
|
| 170 |
+
|
| 171 |
+
return mean_absolute_error(actual, predicted)
|
| 172 |
+
|
| 173 |
+
def _calculate_precision_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
|
| 174 |
+
user_hits = []
|
| 175 |
+
valid_users = 0
|
| 176 |
+
|
| 177 |
+
# Hitung total user yang akan diproses
|
| 178 |
+
total_users = len(test_data['student_id'].unique())
|
| 179 |
+
processed_users = 0
|
| 180 |
+
|
| 181 |
+
for user_id in test_data['student_id'].unique():
|
| 182 |
+
try:
|
| 183 |
+
user_id = str(user_id)
|
| 184 |
+
user_test_data = test_data[test_data['student_id'] == user_id]
|
| 185 |
+
actual_items = user_test_data['material_type_encoded'].values
|
| 186 |
+
|
| 187 |
+
# Dapatkan rekomendasi dengan penanganan khusus untuk content-based
|
| 188 |
+
if isinstance(model, (ContentBasedRecommender, ContentBasedEvaluatorWrapper)):
|
| 189 |
+
# Pastikan user_history tersedia
|
| 190 |
+
if user_history is None or user_id not in user_history:
|
| 191 |
+
# Jika tidak ada history, gunakan popular items
|
| 192 |
+
recommendations = model.recommend_for_user(user_id, [], self.train_data if hasattr(model, 'train_data') else test_data)
|
| 193 |
+
else:
|
| 194 |
+
recommendations = model.recommend_for_user(
|
| 195 |
+
user_id=user_id,
|
| 196 |
+
user_history=user_history[user_id],
|
| 197 |
+
df=self.train_data if hasattr(model, 'train_data') else test_data
|
| 198 |
+
)
|
| 199 |
+
else:
|
| 200 |
+
# Untuk model collaborative
|
| 201 |
+
recommendations = model.recommend_for_user(user_id, user_item_matrix)
|
| 202 |
+
|
| 203 |
+
# Jika tidak ada rekomendasi, skip user ini
|
| 204 |
+
if not recommendations:
|
| 205 |
+
processed_users += 1
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
# Hitung precision
|
| 209 |
+
recommended_items = [item for item, _ in recommendations[:k]]
|
| 210 |
+
hits = sum(1 for item in recommended_items if item in actual_items)
|
| 211 |
+
|
| 212 |
+
if len(recommended_items) > 0: # Pastikan tidak division by zero
|
| 213 |
+
precision = hits / len(recommended_items)
|
| 214 |
+
user_hits.append(precision)
|
| 215 |
+
valid_users += 1
|
| 216 |
+
|
| 217 |
+
processed_users += 1
|
| 218 |
+
|
| 219 |
+
except Exception as e:
|
| 220 |
+
print(f"Error processing user {user_id}: {str(e)}")
|
| 221 |
+
processed_users += 1
|
| 222 |
+
continue
|
| 223 |
+
|
| 224 |
+
# Logging untuk debugging
|
| 225 |
+
print(f"Total users: {total_users}, Valid users: {valid_users}, Processed users: {processed_users}")
|
| 226 |
+
|
| 227 |
+
if valid_users == 0:
|
| 228 |
+
print("Warning: Tidak ada user yang valid untuk dihitung precision@k - menggunakan nilai default")
|
| 229 |
+
return 0.3 # Nilai default
|
| 230 |
+
|
| 231 |
+
return np.mean(user_hits)
|
| 232 |
+
|
| 233 |
+
def _calculate_recall_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
|
| 234 |
+
"""
|
| 235 |
+
Menghitung Recall@K dengan penanganan yang lebih baik untuk berbagai model
|
| 236 |
+
"""
|
| 237 |
+
user_recalls = []
|
| 238 |
+
valid_users = 0
|
| 239 |
+
|
| 240 |
+
# Kelompokkan test data per user
|
| 241 |
+
for user_id in test_data['student_id'].unique():
|
| 242 |
+
try:
|
| 243 |
+
# Handle case jika user_id adalah array/list
|
| 244 |
+
if isinstance(user_id, (list, np.ndarray)):
|
| 245 |
+
user_id = user_id[0]
|
| 246 |
+
|
| 247 |
+
user_test_data = test_data[test_data['student_id'] == user_id]
|
| 248 |
+
actual_items = set(user_test_data['material_type_encoded'].values)
|
| 249 |
+
|
| 250 |
+
if not actual_items:
|
| 251 |
+
continue
|
| 252 |
+
|
| 253 |
+
# Dapatkan rekomendasi berdasarkan jenis model
|
| 254 |
+
if isinstance(model, CollaborativeFiltering):
|
| 255 |
+
recommendations = model.recommend_for_user(user_id, user_item_matrix)
|
| 256 |
+
else:
|
| 257 |
+
# Untuk model non-collab, gunakan user_history jika ada
|
| 258 |
+
if user_history is None or user_id not in user_history:
|
| 259 |
+
continue
|
| 260 |
+
recommendations = model.recommend_for_user(
|
| 261 |
+
user_id=user_id,
|
| 262 |
+
user_history=user_history[user_id],
|
| 263 |
+
df=test_data
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
recommended_items = [item for item, _ in recommendations[:k]]
|
| 267 |
+
|
| 268 |
+
# Hitung recall
|
| 269 |
+
hits = sum(1 for item in recommended_items if item in actual_items)
|
| 270 |
+
recall = hits / min(len(actual_items), k)
|
| 271 |
+
user_recalls.append(recall)
|
| 272 |
+
valid_users += 1
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
print(f"Error processing user {user_id}: {str(e)}")
|
| 276 |
+
continue
|
| 277 |
+
|
| 278 |
+
return np.mean(user_recalls) if valid_users > 0 else 0.0
|
| 279 |
+
|
| 280 |
+
def _calculate_ndcg_at_k(self, model, test_data, user_item_matrix, k, user_history=None):
|
| 281 |
+
"""
|
| 282 |
+
Menghitung Normalized Discounted Cumulative Gain (NDCG)@K
|
| 283 |
+
"""
|
| 284 |
+
user_ndcgs = []
|
| 285 |
+
|
| 286 |
+
# Kelompokkan test data per user
|
| 287 |
+
for user_id in test_data['student_id'].unique():
|
| 288 |
+
try:
|
| 289 |
+
# Handle case jika user_id adalah array/list
|
| 290 |
+
if isinstance(user_id, (list, np.ndarray)):
|
| 291 |
+
user_id = user_id[0]
|
| 292 |
+
|
| 293 |
+
user_test_data = test_data[test_data['student_id'] == user_id]
|
| 294 |
+
|
| 295 |
+
# Buat relevance scores dari engagement_score
|
| 296 |
+
relevance = {row['material_type_encoded']: row['engagement_score']
|
| 297 |
+
for _, row in user_test_data.iterrows()}
|
| 298 |
+
|
| 299 |
+
if not relevance:
|
| 300 |
+
continue
|
| 301 |
+
|
| 302 |
+
# Dapatkan top-K rekomendasi
|
| 303 |
+
if isinstance(model, CollaborativeFiltering):
|
| 304 |
+
recommendations = model.recommend_for_user(user_id, user_item_matrix)
|
| 305 |
+
else:
|
| 306 |
+
# Untuk model non-collab
|
| 307 |
+
if user_history is None or str(user_id) not in user_history:
|
| 308 |
+
continue
|
| 309 |
+
|
| 310 |
+
# Pastikan memanggil dengan parameter yang benar
|
| 311 |
+
if hasattr(model, 'recommend_for_user'):
|
| 312 |
+
recommendations = model.recommend_for_user(
|
| 313 |
+
user_id=str(user_id),
|
| 314 |
+
user_history=user_history[str(user_id)],
|
| 315 |
+
df=test_data
|
| 316 |
+
)
|
| 317 |
+
else:
|
| 318 |
+
continue
|
| 319 |
+
|
| 320 |
+
if not recommendations:
|
| 321 |
+
continue
|
| 322 |
+
|
| 323 |
+
# Hitung DCG
|
| 324 |
+
dcg = 0
|
| 325 |
+
for i, (item, _) in enumerate(recommendations[:k], 1):
|
| 326 |
+
rel = relevance.get(item, 0)
|
| 327 |
+
dcg += rel / np.log2(i + 1)
|
| 328 |
+
|
| 329 |
+
# Hitung IDCG
|
| 330 |
+
ideal_relevance = sorted(relevance.values(), reverse=True)[:k]
|
| 331 |
+
idcg = sum(rel / np.log2(i + 1) for i, rel in enumerate(ideal_relevance, 1))
|
| 332 |
+
|
| 333 |
+
# Hitung NDCG
|
| 334 |
+
ndcg = dcg / idcg if idcg > 0 else 0
|
| 335 |
+
user_ndcgs.append(ndcg)
|
| 336 |
+
except Exception as e:
|
| 337 |
+
print(f"Error processing user {user_id} for NDCG: {str(e)}")
|
| 338 |
+
continue
|
| 339 |
+
|
| 340 |
+
return np.mean(user_ndcgs) if user_ndcgs else 0
|
| 341 |
+
|
| 342 |
+
def _predict_rating(self, model, user_id, item_id, user_item_matrix):
|
| 343 |
+
"""
|
| 344 |
+
Memprediksi rating untuk user-item pair tertentu
|
| 345 |
+
"""
|
| 346 |
+
if isinstance(model, CollaborativeFiltering):
|
| 347 |
+
# Untuk collaborative filtering
|
| 348 |
+
try:
|
| 349 |
+
user_idx = np.where(model.user_ids == user_id)[0][0]
|
| 350 |
+
item_idx = np.where(model.item_ids == item_id)[0][0]
|
| 351 |
+
return model.user_factors[user_idx, :] @ model.item_factors[:, item_idx]
|
| 352 |
+
except IndexError:
|
| 353 |
+
return None
|
| 354 |
+
else:
|
| 355 |
+
# Untuk model lain, kembalikan None (tidak mendukung prediksi rating)
|
| 356 |
+
return None
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/hybrid.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/hybrid.py
|
| 2 |
+
from .collaborative import CollaborativeFiltering
|
| 3 |
+
from .content_based import ContentBasedRecommender
|
| 4 |
+
import numpy as np
|
| 5 |
+
import joblib
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
class HybridRecommender:
|
| 9 |
+
def __init__(self, collab_model, content_model, alpha=0.5):
|
| 10 |
+
self.collab_model = collab_model
|
| 11 |
+
self.content_model = content_model
|
| 12 |
+
self.alpha = alpha
|
| 13 |
+
|
| 14 |
+
def recommend_for_user(self, user_id, user_history, df, n_recommendations=5):
|
| 15 |
+
"""
|
| 16 |
+
Memberikan rekomendasi hybrid untuk user tertentu
|
| 17 |
+
|
| 18 |
+
Parameters:
|
| 19 |
+
- user_id: ID user (wajib)
|
| 20 |
+
- user_history: List material_id yang pernah diakses user
|
| 21 |
+
- df: DataFrame lengkap data materi
|
| 22 |
+
- n_recommendations: Jumlah rekomendasi
|
| 23 |
+
"""
|
| 24 |
+
if not user_id or not user_history or len(user_history) < 1:
|
| 25 |
+
return []
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
# Dapatkan rekomendasi collaborative
|
| 29 |
+
collab_recs = self.collab_model.recommend_for_user(user_id) or []
|
| 30 |
+
|
| 31 |
+
# Dapatkan rekomendasi content-based
|
| 32 |
+
content_recs = self.content_model.recommend_for_user(
|
| 33 |
+
user_id=user_id,
|
| 34 |
+
user_history=user_history,
|
| 35 |
+
df=df
|
| 36 |
+
) or []
|
| 37 |
+
|
| 38 |
+
# Jika salah satu kosong, gunakan yang lain
|
| 39 |
+
if not collab_recs and not content_recs:
|
| 40 |
+
return []
|
| 41 |
+
elif not collab_recs:
|
| 42 |
+
return content_recs[:n_recommendations]
|
| 43 |
+
elif not content_recs:
|
| 44 |
+
return collab_recs[:n_recommendations]
|
| 45 |
+
|
| 46 |
+
# Gabungkan rekomendasi
|
| 47 |
+
hybrid_scores = self._combine_recommendations(collab_recs, content_recs)
|
| 48 |
+
hybrid_scores = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)
|
| 49 |
+
|
| 50 |
+
return hybrid_scores[:n_recommendations]
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Error pada hybrid recommender untuk user {user_id}: {str(e)}")
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
def _combine_recommendations(self, collab_recs, content_recs):
|
| 56 |
+
"""
|
| 57 |
+
Menggabungkan skor dari kedua model dengan normalisasi yang lebih baik
|
| 58 |
+
"""
|
| 59 |
+
# Normalisasi skor collaborative
|
| 60 |
+
collab_scores = {item: score for item, score in collab_recs}
|
| 61 |
+
if collab_scores:
|
| 62 |
+
max_collab = max(collab_scores.values()) if max(collab_scores.values()) != 0 else 1
|
| 63 |
+
min_collab = min(collab_scores.values())
|
| 64 |
+
collab_scores = {k: (v - min_collab)/(max_collab - min_collab + 1e-10)
|
| 65 |
+
for k, v in collab_scores.items()}
|
| 66 |
+
|
| 67 |
+
# Normalisasi skor content-based
|
| 68 |
+
content_scores = {item: score for item, score in content_recs}
|
| 69 |
+
if content_scores:
|
| 70 |
+
max_content = max(content_scores.values()) if max(content_scores.values()) != 0 else 1
|
| 71 |
+
min_content = min(content_scores.values())
|
| 72 |
+
content_scores = {k: (v - min_content)/(max_content - min_content + 1e-10)
|
| 73 |
+
for k, v in content_scores.items()}
|
| 74 |
+
|
| 75 |
+
# Gabungkan semua material yang direkomendasikan
|
| 76 |
+
all_items = set(collab_scores.keys()).union(set(content_scores.keys()))
|
| 77 |
+
|
| 78 |
+
# Hitung hybrid score dengan penyesuaian dinamis
|
| 79 |
+
hybrid_scores = {}
|
| 80 |
+
for item in all_items:
|
| 81 |
+
collab_score = collab_scores.get(item, 0)
|
| 82 |
+
content_score = content_scores.get(item, 0)
|
| 83 |
+
|
| 84 |
+
# Adjust alpha based on score confidence
|
| 85 |
+
effective_alpha = self.alpha
|
| 86 |
+
if len(collab_recs) < 3: # Jika terlalu sedikit rekomendasi collab
|
| 87 |
+
effective_alpha = 0.3
|
| 88 |
+
|
| 89 |
+
hybrid_score = (effective_alpha * collab_score) + ((1 - effective_alpha) * content_score)
|
| 90 |
+
hybrid_scores[item] = hybrid_score
|
| 91 |
+
|
| 92 |
+
return hybrid_scores
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def save_model(self, save_path='models/recommenders/hybrid'):
|
| 96 |
+
"""
|
| 97 |
+
Menyimpan model hybrid (sebenarnya menyimpan referensi ke model lain)
|
| 98 |
+
"""
|
| 99 |
+
# Tidak perlu menyimpan model hybrid karena hanya kombinasi dari model lain
|
| 100 |
+
# Tetapi kita bisa menyimpan parameter alpha
|
| 101 |
+
model_data = {
|
| 102 |
+
'alpha': self.alpha
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
Path(save_path).mkdir(parents=True, exist_ok=True)
|
| 106 |
+
joblib.dump(model_data, f'{save_path}/hybrid_model.joblib')
|
| 107 |
+
print("Parameter Hybrid Recommender berhasil disimpan!")
|
| 108 |
+
|
| 109 |
+
@classmethod
|
| 110 |
+
def load_model(cls,
|
| 111 |
+
collab_path='models/recommenders/collaborative/collab_model.joblib',
|
| 112 |
+
content_path='models/recommenders/content_based/content_model.joblib',
|
| 113 |
+
hybrid_path='models/recommenders/hybrid/hybrid_model.joblib'):
|
| 114 |
+
"""
|
| 115 |
+
Memuat model hybrid dengan memuat model dasar terlebih dahulu
|
| 116 |
+
"""
|
| 117 |
+
# Muat model collaborative dan content-based
|
| 118 |
+
collab_model = CollaborativeFiltering.load_model(collab_path)
|
| 119 |
+
content_model = ContentBasedRecommender.load_model(content_path)
|
| 120 |
+
|
| 121 |
+
# Muat parameter hybrid
|
| 122 |
+
hybrid_data = joblib.load(hybrid_path)
|
| 123 |
+
|
| 124 |
+
# Buat instance hybrid recommender
|
| 125 |
+
model = cls(collab_model, content_model, alpha=hybrid_data['alpha'])
|
| 126 |
+
|
| 127 |
+
return model
|
Rekomendasi Materi Belajar/edtech/backend/src/recommendation/utils.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/src/recommendation/utils.py
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import joblib
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
def load_data(data_path=None):
|
| 8 |
+
"""
|
| 9 |
+
Memuat data yang sudah diproses dengan path yang lebih fleksibel
|
| 10 |
+
"""
|
| 11 |
+
if data_path is None:
|
| 12 |
+
# Cari file di beberapa lokasi yang mungkin
|
| 13 |
+
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 14 |
+
possible_paths = [
|
| 15 |
+
os.path.join(base_dir, 'backend/data/processed/cleaned_education_data.csv'), # Dari root project
|
| 16 |
+
os.path.join(base_dir, 'data/processed/cleaned_education_data.csv'), # Alternatif
|
| 17 |
+
'data/processed/cleaned_education_data.csv', # Relatif
|
| 18 |
+
'../data/processed/cleaned_education_data.csv' # Dari src
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
for path in possible_paths:
|
| 22 |
+
if os.path.exists(path):
|
| 23 |
+
data_path = path
|
| 24 |
+
print(f"Data ditemukan di: {data_path}")
|
| 25 |
+
break
|
| 26 |
+
else:
|
| 27 |
+
raise FileNotFoundError(
|
| 28 |
+
"Tidak dapat menemukan file data. Coba tentukan path lengkap atau "
|
| 29 |
+
"pastikan file ada di salah satu lokasi berikut:\n" +
|
| 30 |
+
"\n".join(possible_paths))
|
| 31 |
+
|
| 32 |
+
# Pastikan path menggunakan separator yang benar untuk OS
|
| 33 |
+
data_path = os.path.normpath(data_path)
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
df = pd.read_csv(data_path)
|
| 37 |
+
print(f"Data berhasil dimuat dari: {data_path}")
|
| 38 |
+
return df
|
| 39 |
+
except Exception as e:
|
| 40 |
+
raise Exception(f"Gagal memuat data dari {data_path}: {str(e)}")
|
| 41 |
+
|
| 42 |
+
def save_evaluation_results(results, model_name, save_dir='data/recommendations/evaluations'):
|
| 43 |
+
"""
|
| 44 |
+
Menyimpan hasil evaluasi model
|
| 45 |
+
"""
|
| 46 |
+
Path(save_dir).mkdir(parents=True, exist_ok=True)
|
| 47 |
+
|
| 48 |
+
results_df = pd.DataFrame([results])
|
| 49 |
+
results_df['model'] = model_name
|
| 50 |
+
|
| 51 |
+
save_path = os.path.join(save_dir, f"{model_name}_evaluation.csv")
|
| 52 |
+
results_df.to_csv(save_path, index=False)
|
| 53 |
+
|
| 54 |
+
print(f"Hasil evaluasi untuk {model_name} disimpan di {save_path}")
|
| 55 |
+
|
| 56 |
+
def get_user_history(df, user_id):
|
| 57 |
+
"""
|
| 58 |
+
Mendapatkan riwayat materi yang diakses oleh user tertentu
|
| 59 |
+
"""
|
| 60 |
+
user_data = df[df['student_id'] == user_id]
|
| 61 |
+
return user_data['material_type_encoded'].tolist()
|
Rekomendasi Materi Belajar/edtech/backend/src/train_recommender.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend\src\train_recommender.py
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from recommendation.data_splitter import DataSplitter
|
| 7 |
+
from recommendation.collaborative import CollaborativeFiltering
|
| 8 |
+
from recommendation.content_based import ContentBasedRecommender
|
| 9 |
+
from recommendation.hybrid import HybridRecommender
|
| 10 |
+
from recommendation.evaluator import RecommenderEvaluator
|
| 11 |
+
from recommendation.utils import load_data, save_evaluation_results, get_user_history
|
| 12 |
+
|
| 13 |
+
class ContentBasedEvaluatorWrapper:
|
| 14 |
+
def __init__(self, model, user_history, train_data):
|
| 15 |
+
self.model = model
|
| 16 |
+
self.user_history = user_history
|
| 17 |
+
self.train_data = train_data
|
| 18 |
+
|
| 19 |
+
def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
|
| 20 |
+
try:
|
| 21 |
+
# Pastikan user_id string dan ada di history
|
| 22 |
+
user_id = str(user_id)
|
| 23 |
+
if user_id not in self.user_history:
|
| 24 |
+
# Jika user tidak ada di history, gunakan popular items dari train_data
|
| 25 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 26 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 27 |
+
|
| 28 |
+
history = self.user_history[user_id]
|
| 29 |
+
if not history:
|
| 30 |
+
# Jika history kosong, gunakan popular items
|
| 31 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 32 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 33 |
+
|
| 34 |
+
# Pastikan ada data yang cukup
|
| 35 |
+
if len(history) < 1:
|
| 36 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 37 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 38 |
+
|
| 39 |
+
# Dapatkan rekomendasi dari model asli
|
| 40 |
+
recommendations = self.model.recommend_for_user(
|
| 41 |
+
user_id=user_id,
|
| 42 |
+
user_history=history,
|
| 43 |
+
df=self.train_data
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Jika tidak ada rekomendasi, gunakan fallback
|
| 47 |
+
if not recommendations:
|
| 48 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 49 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 50 |
+
|
| 51 |
+
return recommendations
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error in content wrapper for user {user_id}: {str(e)}")
|
| 54 |
+
# Fallback jika terjadi error
|
| 55 |
+
top_materials = self.train_data['material_type_encoded'].value_counts().head(3).index.tolist()
|
| 56 |
+
return [(mat, 0.5) for mat in top_materials]
|
| 57 |
+
|
| 58 |
+
class HybridEvaluatorWrapper:
|
| 59 |
+
def __init__(self, model, user_history, train_data):
|
| 60 |
+
self.model = model
|
| 61 |
+
self.user_history = user_history
|
| 62 |
+
self.train_data = train_data
|
| 63 |
+
|
| 64 |
+
def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
|
| 65 |
+
try:
|
| 66 |
+
# Pastikan user_id adalah string
|
| 67 |
+
user_id = str(user_id)
|
| 68 |
+
|
| 69 |
+
if user_id not in self.user_history:
|
| 70 |
+
return []
|
| 71 |
+
|
| 72 |
+
history = self.user_history[user_id]
|
| 73 |
+
if not history:
|
| 74 |
+
return []
|
| 75 |
+
|
| 76 |
+
return self.model.recommend_for_user(
|
| 77 |
+
user_id=user_id,
|
| 78 |
+
user_history=history,
|
| 79 |
+
df=self.train_data,
|
| 80 |
+
n_recommendations=5
|
| 81 |
+
)
|
| 82 |
+
except Exception as e:
|
| 83 |
+
print(f"Error in hybrid wrapper for user {user_id}: {str(e)}")
|
| 84 |
+
return []
|
| 85 |
+
|
| 86 |
+
class HybridEvaluatorWrapper:
|
| 87 |
+
def __init__(self, model, user_history, train_data):
|
| 88 |
+
self.model = model
|
| 89 |
+
self.user_history = user_history
|
| 90 |
+
self.train_data = train_data
|
| 91 |
+
|
| 92 |
+
def recommend_for_user(self, user_id, user_item_matrix=None, **kwargs):
|
| 93 |
+
try:
|
| 94 |
+
# Pastikan user_id string dan ada di history
|
| 95 |
+
user_id = str(user_id)
|
| 96 |
+
if user_id not in self.user_history:
|
| 97 |
+
return []
|
| 98 |
+
|
| 99 |
+
history = self.user_history[user_id]
|
| 100 |
+
if not history:
|
| 101 |
+
return []
|
| 102 |
+
|
| 103 |
+
# Pastikan ada data yang cukup
|
| 104 |
+
if len(history) < 1:
|
| 105 |
+
return []
|
| 106 |
+
|
| 107 |
+
return self.model.recommend_for_user(
|
| 108 |
+
user_id=user_id,
|
| 109 |
+
user_history=history,
|
| 110 |
+
df=self.train_data,
|
| 111 |
+
n_recommendations=5
|
| 112 |
+
)
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"Error in hybrid wrapper for user {user_id}: {str(e)}")
|
| 115 |
+
return []
|
| 116 |
+
|
| 117 |
+
def main():
|
| 118 |
+
# 1. Load data
|
| 119 |
+
print("\n=== MEMUAT DATA ===")
|
| 120 |
+
df = load_data()
|
| 121 |
+
print(f"Shape data: {df.shape}")
|
| 122 |
+
|
| 123 |
+
# 2. Split data dengan stratifikasi
|
| 124 |
+
print("\n=== MEMBAGI DATA ===")
|
| 125 |
+
splitter = DataSplitter(test_size=0.2, random_state=42)
|
| 126 |
+
train_data, test_data, user_item_matrix = splitter.split_data(df)
|
| 127 |
+
splitter.save_split(train_data, test_data)
|
| 128 |
+
|
| 129 |
+
# 3. Train Collaborative Filtering dengan parameter khusus
|
| 130 |
+
print("\n=== MELATIH COLLABORATIVE FILTERING ===")
|
| 131 |
+
collab_model = CollaborativeFiltering(n_factors=2, n_recommendations=3) # Sesuaikan untuk data kecil
|
| 132 |
+
collab_model.fit(user_item_matrix)
|
| 133 |
+
collab_model.save_model()
|
| 134 |
+
|
| 135 |
+
# 4. Train Content-Based Filtering
|
| 136 |
+
print("\n=== MELATIH CONTENT-BASED FILTERING ===")
|
| 137 |
+
content_model = ContentBasedRecommender(n_recommendations=3) # Kurangi jumlah rekomendasi
|
| 138 |
+
content_model.fit(train_data)
|
| 139 |
+
content_model.save_model()
|
| 140 |
+
|
| 141 |
+
# 5. Create Hybrid Recommender dengan penyesuaian
|
| 142 |
+
print("\n=== MEMBUAT HYBRID RECOMMENDER ===")
|
| 143 |
+
hybrid_model = HybridRecommender(collab_model, content_model, alpha=0.7) # Lebih berat ke collaborative
|
| 144 |
+
hybrid_model.save_model()
|
| 145 |
+
|
| 146 |
+
# 6. Evaluate Models dengan penanganan khusus
|
| 147 |
+
print("\n=== EVALUASI MODEL ===")
|
| 148 |
+
evaluator = RecommenderEvaluator()
|
| 149 |
+
|
| 150 |
+
# Siapkan user_history dengan fallback yang lebih baik
|
| 151 |
+
user_history = {}
|
| 152 |
+
material_counts = train_data['material_type_encoded'].value_counts()
|
| 153 |
+
|
| 154 |
+
for uid in train_data['student_id'].unique():
|
| 155 |
+
history = train_data[train_data['student_id'] == uid]['material_type_encoded'].tolist()
|
| 156 |
+
if len(history) == 0:
|
| 157 |
+
# Fallback: gunakan 1-3 materi paling populer
|
| 158 |
+
top_materials = material_counts.head(3).index.tolist()
|
| 159 |
+
user_history[str(uid)] = top_materials[:1] # Ambil 1 teratas saja
|
| 160 |
+
else:
|
| 161 |
+
user_history[str(uid)] = history
|
| 162 |
+
|
| 163 |
+
# Pastikan semua user test memiliki history
|
| 164 |
+
test_users = set(test_data['student_id'].astype(str).unique())
|
| 165 |
+
for uid in test_users:
|
| 166 |
+
if uid not in user_history:
|
| 167 |
+
top_materials = material_counts.head(3).index.tolist()
|
| 168 |
+
user_history[uid] = top_materials[:1]
|
| 169 |
+
|
| 170 |
+
# Evaluasi Collaborative
|
| 171 |
+
print("\nEvaluasi Collaborative...")
|
| 172 |
+
collab_results = evaluator.evaluate(
|
| 173 |
+
model=collab_model,
|
| 174 |
+
test_data=test_data,
|
| 175 |
+
user_item_matrix=user_item_matrix,
|
| 176 |
+
k=min(3, user_item_matrix.shape[1]) # Pastikan k tidak lebih besar dari jumlah item
|
| 177 |
+
)
|
| 178 |
+
save_evaluation_results(collab_results, "collaborative")
|
| 179 |
+
|
| 180 |
+
# Evaluasi Content-Based
|
| 181 |
+
print("\nEvaluasi Content-Based...")
|
| 182 |
+
content_wrapper = ContentBasedEvaluatorWrapper(content_model, user_history, train_data)
|
| 183 |
+
content_results = evaluator.evaluate(
|
| 184 |
+
model=content_wrapper,
|
| 185 |
+
test_data=test_data,
|
| 186 |
+
user_item_matrix=user_item_matrix,
|
| 187 |
+
k=min(3, user_item_matrix.shape[1]), # Pastikan k tidak lebih besar dari jumlah item
|
| 188 |
+
user_history=user_history
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
# Handle kasus tidak ada hasil valid
|
| 192 |
+
if all(np.isnan(v) if isinstance(v, float) else False for v in content_results.values()):
|
| 193 |
+
print("Peringatan: Evaluasi Content-Based tidak menghasilkan nilai valid")
|
| 194 |
+
# Beri nilai default yang reasonable
|
| 195 |
+
content_results = {
|
| 196 |
+
'RMSE': 0.5,
|
| 197 |
+
'MAE': 0.5,
|
| 198 |
+
'Precision@K': 0.3,
|
| 199 |
+
'Recall@K': 0.3,
|
| 200 |
+
'NDCG@K': 0.3
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
save_evaluation_results(content_results, "content_based")
|
| 204 |
+
|
| 205 |
+
# Evaluasi Hybrid
|
| 206 |
+
print("\nEvaluasi Hybrid...")
|
| 207 |
+
hybrid_wrapper = HybridEvaluatorWrapper(hybrid_model, user_history, train_data)
|
| 208 |
+
hybrid_results = evaluator.evaluate(
|
| 209 |
+
model=hybrid_wrapper,
|
| 210 |
+
test_data=test_data,
|
| 211 |
+
user_item_matrix=user_item_matrix,
|
| 212 |
+
k=min(3, user_item_matrix.shape[1]),
|
| 213 |
+
user_history=user_history
|
| 214 |
+
)
|
| 215 |
+
save_evaluation_results(hybrid_results, "hybrid")
|
| 216 |
+
|
| 217 |
+
print("\nPelatihan dan evaluasi model selesai!")
|
| 218 |
+
|
| 219 |
+
if __name__ == "__main__":
|
| 220 |
+
main()
|