Final_ML_Project / app /practical.py
Bardi-ya's picture
Upload 51 files
c296592 verified
import sys
import os
# Add the parent directory to sys.path so 'src' can be imported
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from src.preprocessing import Preprocessing
from src.eda import EDA
from src.feature_engineering import FeatureEngineering
from src.modeling import RecommenderModels
from src.evaluation import leave_one_out_by_timestamp, evaluate_all, summarize_results
def main():
print("========== Step 1: Preprocessing ==========")
preprocessor = Preprocessing()
dfs = preprocessor.run_all()
# print("========== Step 2: Exploratory Data Analysis (EDA) ==========")
# eda = EDA(dfs)
# eda.run_all()
print("========== Step 3: Feature Engineering ==========")
fe = FeatureEngineering(dfs)
fe_outputs = fe.run_all()
merged_df = fe_outputs["merged_df"]
merged_df_with_tfidf = fe_outputs["merged_df_with_tfidf"]
unique_movies_reduced = fe_outputs["unique_movies_reduced"]
ratings_df = dfs["ratings_df"]
print("========== Step 4: Modeling & Recommendation ==========")
models = RecommenderModels(
merged_df_with_tfidf=merged_df_with_tfidf,
unique_movies_reduced=unique_movies_reduced,
ratings_df=ratings_df
)
models.fit_popularity()
models.fit_content_based()
models.fit_cf()
print("CF RMSEs (kNN, SVD):", models.evaluate_cf())
rmse_scores, best_alpha = models.tune_hybrid_alpha()
print("Best alpha:", best_alpha)
print("Hybrid RMSE:", models.evaluate_hybrid())
models.save_models()
# Example: get recommendations for user 1
print("Top 10 Content-Based Recommendations for user 1:")
print(models.get_content_based_recommendations(user_id=1, top_n=10))
print("========== Step 5: Evaluation ==========")
# Time-aware split
train_ratings, test_ratings = leave_one_out_by_timestamp(ratings_df)
all_items = set(merged_df_with_tfidf['movieId'].astype(str).unique())
item_popularity = merged_df_with_tfidf['movieId'].value_counts().to_dict()
item_popularity = {str(k): v for k, v in item_popularity.items()}
svd_cols = [col for col in unique_movies_reduced.columns if col.startswith("svd_")]
item_features = {
str(row.movieId): row[svd_cols].values
for _, row in unique_movies_reduced.iterrows()
}
# Generate predictions for each model
# Implement prediction methods if not present in RecommenderModels
def predict_content_based(models, test_df):
preds = []
for _, row in test_df.iterrows():
user_id = row['userId']
movie_id = row['movieId']
true_rating = row['rating']
pred_rating = models.get_content_based_score(user_id, movie_id)
preds.append((user_id, movie_id, true_rating, pred_rating, {}))
return preds
def predict_collaborative(models, test_df):
preds = []
for _, row in test_df.iterrows():
user_id = row['userId']
movie_id = row['movieId']
true_rating = row['rating']
# Use SVD as the collaborative model (or knn_user_based if you prefer)
try:
pred_rating = models.svd_mf.predict(str(user_id), str(movie_id)).est
except Exception:
pred_rating = 0
preds.append((user_id, movie_id, true_rating, pred_rating, {}))
return preds
def predict_hybrid(models, test_df, alpha):
preds = []
for _, row in test_df.iterrows():
user_id = row['userId']
movie_id = row['movieId']
true_rating = row['rating']
pred_rating = models.hybrid_prediction(user_id, movie_id, alpha)
preds.append((user_id, movie_id, true_rating, pred_rating, {}))
return preds
predictions_cb = predict_content_based(models, test_ratings)
predictions_cf = predict_collaborative(models, test_ratings)
predictions_hybrid = predict_hybrid(models, test_ratings, alpha=best_alpha)
# Evaluate
results_cb = evaluate_all(predictions_cb, test_ratings.values, all_items, item_popularity, item_features)
results_cf = evaluate_all(predictions_cf, test_ratings.values, all_items, item_popularity, item_features)
results_hybrid = evaluate_all(predictions_hybrid, test_ratings.values, all_items, item_popularity, item_features)
# Print summary table
summary = summarize_results({
"Content-Based": results_cb,
"Collaborative": results_cf,
"Hybrid": results_hybrid
})
print(summary)
if __name__ == "__main__":
main()