N-Kibria commited on
Commit
8c0624c
·
verified ·
1 Parent(s): cd645ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -381
app.py CHANGED
@@ -343,17 +343,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
343
  - Deep learning model with user and movie embeddings
344
  - Learns complex non-linear patterns in user behavior
345
 
346
- ### 📊 Dataset
347
- - **MovieLens 100k** dataset
348
- - 100,000 ratings from 943 users on 1,682 movies
349
- - Ratings scale: 1-5 stars
350
 
351
  ### 🎯 Performance Metrics
352
  - **Precision@10**: 26.77%
353
  - **NDCG@10**: 28.50%
354
  - **Model improves recommendations by 40% vs baseline**
355
 
356
- ### 👨‍💻 Created For
357
  **DataSynthis Job Task**
358
 
359
  ### 🔗 Technologies Used
@@ -375,382 +372,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
375
  </div>
376
  """)
377
 
378
- if __name__ == "__main__":
379
- demo.launch(
380
- share=False,
381
- server_name="0.0.0.0",
382
- server_port=7860
383
- )import gradio as gr
384
- import pickle
385
- import pandas as pd
386
- import numpy as np
387
- import torch
388
- import torch.nn as nn
389
- import os
390
- from scipy.sparse import csr_matrix
391
-
392
- class ItemBasedCF:
393
- pass
394
-
395
- class SVDRecommender:
396
- pass
397
-
398
- class NeuralCF(nn.Module):
399
-
400
- def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
401
- super(NeuralCF, self).__init__()
402
- self.user_embedding = nn.Embedding(n_users, embedding_dim)
403
- self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
404
-
405
- layers = []
406
- input_dim = embedding_dim * 2
407
- for hidden_dim in hidden_layers:
408
- layers.append(nn.Linear(input_dim, hidden_dim))
409
- layers.append(nn.ReLU())
410
- layers.append(nn.Dropout(0.2))
411
- input_dim = hidden_dim
412
- layers.append(nn.Linear(input_dim, 1))
413
- self.mlp = nn.Sequential(*layers)
414
-
415
- def forward(self, user_ids, movie_ids):
416
- user_emb = self.user_embedding(user_ids)
417
- movie_emb = self.movie_embedding(movie_ids)
418
- x = torch.cat([user_emb, movie_emb], dim=1)
419
- output = self.mlp(x)
420
- return output.squeeze()
421
-
422
- def predict(self, user_idx, movie_idx, device='cpu'):
423
- self.eval()
424
- with torch.no_grad():
425
- user_tensor = torch.LongTensor([user_idx]).to(device)
426
- movie_tensor = torch.LongTensor([movie_idx]).to(device)
427
- prediction = self.forward(user_tensor, movie_tensor)
428
- return torch.clamp(prediction, 1, 5).item()
429
-
430
- class HybridRecommender:
431
- pass
432
-
433
- class MovieLensDataLoader:
434
- pass
435
-
436
- def load_model_and_data():
437
- import os
438
-
439
- print("Checking for files...")
440
- print(f"Current directory: {os.getcwd()}")
441
- print(f"Files in current directory: {os.listdir('.')}")
442
-
443
- if os.path.exists('model_artifacts'):
444
- print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}")
445
- else:
446
- print("ERROR: model_artifacts/ folder does not exist!")
447
-
448
- try:
449
- files_to_check = [
450
- 'model_artifacts/hybrid_model.pkl',
451
- 'model_artifacts/loader.pkl',
452
- 'model_artifacts/movies.pkl'
453
- ]
454
-
455
- for file_path in files_to_check:
456
- if not os.path.exists(file_path):
457
- print(f"ERROR: Missing file: {file_path}")
458
- else:
459
- file_size = os.path.getsize(file_path) / (1024*1024)
460
- print(f"Found: {file_path} ({file_size:.2f} MB)")
461
-
462
- with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
463
- model = pickle.load(f)
464
- print("✓ Loaded hybrid_model.pkl")
465
-
466
- with open('model_artifacts/loader.pkl', 'rb') as f:
467
- loader = pickle.load(f)
468
- print("✓ Loaded loader.pkl")
469
-
470
- with open('model_artifacts/movies.pkl', 'rb') as f:
471
- movies = pickle.load(f)
472
- print("✓ Loaded movies.pkl")
473
-
474
- user_ids = sorted(loader.user_id_map.keys())
475
- print(f"✓ Model loaded successfully! {len(user_ids)} users available")
476
-
477
- return model, loader, movies, user_ids
478
- except FileNotFoundError as e:
479
- print(f"ERROR: File not found - {e}")
480
- print("Make sure all pkl files are in the model_artifacts/ folder")
481
- return None, None, None, []
482
- except Exception as e:
483
- print(f"ERROR loading model: {type(e).__name__}: {e}")
484
- import traceback
485
- traceback.print_exc()
486
- return None, None, None, []
487
-
488
- print("Loading model and data...")
489
- model, loader, movies_df, user_ids = load_model_and_data()
490
- print(f"Model loaded! Available users: {len(user_ids)}")
491
-
492
- def get_recommendations(user_id, num_recommendations):
493
- if model is None or loader is None:
494
- return "❌ Error: Model not loaded properly. Please check the model files."
495
-
496
- try:
497
- user_id = int(user_id)
498
- num_recommendations = int(num_recommendations)
499
-
500
- if user_id not in loader.user_id_map:
501
- return f"❌ User ID {user_id} not found! Please select a valid user ID."
502
-
503
- recommendations = model.recommend_movies(
504
- user_id=user_id,
505
- N=num_recommendations,
506
- user_id_map=loader.user_id_map,
507
- reverse_movie_map=loader.reverse_movie_map,
508
- movies_df=movies_df
509
- )
510
-
511
- if not recommendations:
512
- return f"❌ No recommendations found for User {user_id}"
513
-
514
- output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
515
- output += "=" * 60 + "\n\n"
516
-
517
- for i, (movie_id, title, score) in enumerate(recommendations, 1):
518
- stars = "⭐" * int(score)
519
- output += f"**{i}. {title}**\n"
520
- output += f" • Predicted Rating: {score:.2f}/5.00 {stars}\n"
521
- output += f" • Movie ID: {movie_id}\n\n"
522
-
523
- return output
524
-
525
- except ValueError:
526
- return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations"
527
- except Exception as e:
528
- return f"❌ Error generating recommendations: {str(e)}"
529
-
530
- def get_user_history(user_id):
531
- if model is None or loader is None:
532
- return "❌ Error: Model not loaded properly."
533
-
534
- try:
535
- user_id = int(user_id)
536
-
537
- if user_id not in loader.user_id_map:
538
- return f"❌ User ID {user_id} not found!"
539
-
540
- user_idx = loader.user_id_map[user_id]
541
-
542
- user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
543
- rated_indices = np.where(user_ratings > 0)[0]
544
-
545
- if len(rated_indices) == 0:
546
- return f"No rating history found for User {user_id}"
547
-
548
- history = []
549
- for movie_idx in rated_indices:
550
- original_movie_id = loader.reverse_movie_map[movie_idx]
551
- title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0]
552
- rating = user_ratings[movie_idx]
553
- history.append((title, rating))
554
-
555
- history.sort(key=lambda x: x[1], reverse=True)
556
-
557
- output = f"📊 **Rating History for User {user_id}**\n\n"
558
- output += f"Total movies rated: {len(history)}\n"
559
- output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
560
- output += "=" * 60 + "\n\n"
561
- output += "**Top 10 Highest Rated Movies:**\n\n"
562
-
563
- for i, (title, rating) in enumerate(history[:10], 1):
564
- stars = "⭐" * int(rating)
565
- output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
566
-
567
- return output
568
-
569
- except Exception as e:
570
- return f"❌ Error: {str(e)}"
571
-
572
- def get_movie_info(movie_title_search):
573
- if movies_df is None:
574
- return "❌ Error: Movies data not loaded"
575
-
576
- try:
577
- matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
578
-
579
- if len(matches) == 0:
580
- return f"❌ No movies found matching '{movie_title_search}'"
581
-
582
- output = f"🔍 **Search Results for '{movie_title_search}'**\n\n"
583
- output += f"Found {len(matches)} movie(s):\n\n"
584
- output += "=" * 60 + "\n\n"
585
-
586
- for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
587
- output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
588
-
589
- if len(matches) > 20:
590
- output += f"\n... and {len(matches) - 20} more results"
591
-
592
- return output
593
-
594
- except Exception as e:
595
- return f"❌ Error: {str(e)}"
596
-
597
- with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
598
-
599
- gr.Markdown("""
600
- # 🎬 Hybrid Movie Recommendation System
601
- ### DataSynthis Job Task - Powered by AI
602
-
603
- This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
604
- to provide personalized movie recommendations from the MovieLens 100k dataset.
605
-
606
- ---
607
- """)
608
-
609
- with gr.Tabs():
610
-
611
- with gr.Tab("🎯 Get Recommendations"):
612
- gr.Markdown("### Get personalized movie recommendations for any user")
613
-
614
- with gr.Row():
615
- with gr.Column(scale=1):
616
- user_id_input = gr.Number(
617
- label="User ID",
618
- value=1,
619
- minimum=1,
620
- maximum=943,
621
- step=1,
622
- info=f"Enter a user ID (1-943)"
623
- )
624
-
625
- num_recs_input = gr.Slider(
626
- label="Number of Recommendations",
627
- minimum=5,
628
- maximum=20,
629
- value=10,
630
- step=1
631
- )
632
-
633
- recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
634
-
635
- with gr.Column(scale=2):
636
- recommendations_output = gr.Textbox(
637
- label="Recommendations",
638
- lines=20,
639
- max_lines=30
640
- )
641
-
642
- recommend_btn.click(
643
- fn=get_recommendations,
644
- inputs=[user_id_input, num_recs_input],
645
- outputs=recommendations_output
646
- )
647
-
648
- gr.Markdown("""
649
- **How it works:**
650
- - Enter a User ID (between 1 and 943)
651
- - Choose how many recommendations you want
652
- - Click "Get Recommendations" to see personalized movie suggestions
653
- """)
654
-
655
- with gr.Tab("📊 User History"):
656
- gr.Markdown("### View a user's rating history")
657
-
658
- with gr.Row():
659
- with gr.Column(scale=1):
660
- user_id_history = gr.Number(
661
- label="User ID",
662
- value=1,
663
- minimum=1,
664
- maximum=943,
665
- step=1
666
- )
667
-
668
- history_btn = gr.Button("📊 View History", variant="primary")
669
-
670
- with gr.Column(scale=2):
671
- history_output = gr.Textbox(
672
- label="Rating History",
673
- lines=20,
674
- max_lines=30
675
- )
676
-
677
- history_btn.click(
678
- fn=get_user_history,
679
- inputs=user_id_history,
680
- outputs=history_output
681
- )
682
-
683
- with gr.Tab("🔍 Search Movies"):
684
- gr.Markdown("### Search for movies in the database")
685
-
686
- with gr.Row():
687
- with gr.Column(scale=1):
688
- movie_search = gr.Textbox(
689
- label="Movie Title Search",
690
- placeholder="e.g., Star Wars, Godfather, Titanic...",
691
- value="Star Wars"
692
- )
693
-
694
- search_btn = gr.Button("🔍 Search", variant="primary")
695
-
696
- with gr.Column(scale=2):
697
- search_output = gr.Textbox(
698
- label="Search Results",
699
- lines=20,
700
- max_lines=30
701
- )
702
-
703
- search_btn.click(
704
- fn=get_movie_info,
705
- inputs=movie_search,
706
- outputs=search_output
707
- )
708
-
709
- with gr.Tab("ℹ️ About"):
710
- gr.Markdown(""" About This System
711
-
712
- Model Architecture
713
- This is a Hybrid Recommendation System that combines three powerful approaches:
714
-
715
- 1. Item-Based Collaborative Filtering
716
- - Uses cosine similarity between movies
717
- - Recommends movies similar to what you've liked before
718
-
719
- 2. SVD Matrix Factorization
720
- - Decomposes the user-movie rating matrix
721
- - Discovers latent factors that explain user preferences
722
-
723
- 3. Neural Collaborative Filtering (NCF)
724
- - Deep learning model with user and movie embeddings
725
- - Learns complex non-linear patterns in user behavior
726
-
727
-
728
-
729
- Performance Metrics
730
- - Precision@10: 26.77%
731
- - NDCG@10: 28.50%
732
- - Model improves recommendations by 40% vs baseline**
733
-
734
-
735
- Technologies Used
736
- - PyTorch (Neural Networks)
737
- - Scikit-learn (SVD, Similarity)
738
- - Pandas & NumPy (Data Processing)
739
- - Gradio (Web Interface)
740
-
741
- ---
742
-
743
- **Note**: This model is trained on the MovieLens 100k dataset.
744
- User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
745
- """)
746
-
747
- gr.Markdown("""
748
- ---
749
- <div style='text-align: center'>
750
- <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❤️ for DataSynthis</p>
751
- </div>
752
- """)
753
-
754
  if __name__ == "__main__":
755
  demo.launch(
756
  share=False,
 
343
  - Deep learning model with user and movie embeddings
344
  - Learns complex non-linear patterns in user behavior
345
 
346
+
 
 
 
347
 
348
  ### 🎯 Performance Metrics
349
  - **Precision@10**: 26.77%
350
  - **NDCG@10**: 28.50%
351
  - **Model improves recommendations by 40% vs baseline**
352
 
353
+ ### Created For
354
  **DataSynthis Job Task**
355
 
356
  ### 🔗 Technologies Used
 
372
  </div>
373
  """)
374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  if __name__ == "__main__":
376
  demo.launch(
377
  share=False,