Spaces:

N-Kibria
/

DataSynthis_Job_task

Sleeping

App Files Files Community

N-Kibria commited on Oct 4, 2025

Commit

8c0624c

verified ·

1 Parent(s): cd645ae

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -381

app.py CHANGED Viewed

@@ -343,17 +343,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                - Deep learning model with user and movie embeddings
                - Learns complex non-linear patterns in user behavior
-            ### 📊 Dataset
-            - **MovieLens 100k** dataset
-            - 100,000 ratings from 943 users on 1,682 movies
-            - Ratings scale: 1-5 stars
             ### 🎯 Performance Metrics
             - **Precision@10**: 26.77%
             - **NDCG@10**: 28.50%
             - **Model improves recommendations by 40% vs baseline**
-            ### 👨‍💻 Created For
             **DataSynthis Job Task**
             ### 🔗 Technologies Used
@@ -375,382 +372,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
     </div>
     """)
-if __name__ == "__main__":
-    demo.launch(
-        share=False,
-        server_name="0.0.0.0",
-        server_port=7860
-    )import gradio as gr
-import pickle
-import pandas as pd
-import numpy as np
-import torch
-import torch.nn as nn
-import os
-from scipy.sparse import csr_matrix
-class ItemBasedCF:
-    pass
-class SVDRecommender:
-    pass
-class NeuralCF(nn.Module):
-    def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
-        super(NeuralCF, self).__init__()
-        self.user_embedding = nn.Embedding(n_users, embedding_dim)
-        self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
-        layers = []
-        input_dim = embedding_dim * 2
-        for hidden_dim in hidden_layers:
-            layers.append(nn.Linear(input_dim, hidden_dim))
-            layers.append(nn.ReLU())
-            layers.append(nn.Dropout(0.2))
-            input_dim = hidden_dim
-        layers.append(nn.Linear(input_dim, 1))
-        self.mlp = nn.Sequential(*layers)
-    def forward(self, user_ids, movie_ids):
-        user_emb = self.user_embedding(user_ids)
-        movie_emb = self.movie_embedding(movie_ids)
-        x = torch.cat([user_emb, movie_emb], dim=1)
-        output = self.mlp(x)
-        return output.squeeze()
-    def predict(self, user_idx, movie_idx, device='cpu'):
-        self.eval()
-        with torch.no_grad():
-            user_tensor = torch.LongTensor([user_idx]).to(device)
-            movie_tensor = torch.LongTensor([movie_idx]).to(device)
-            prediction = self.forward(user_tensor, movie_tensor)
-            return torch.clamp(prediction, 1, 5).item()
-class HybridRecommender:
-    pass
-class MovieLensDataLoader:
-    pass
-def load_model_and_data():
-    import os
-    print("Checking for files...")
-    print(f"Current directory: {os.getcwd()}")
-    print(f"Files in current directory: {os.listdir('.')}")
-    if os.path.exists('model_artifacts'):
-        print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}")
-    else:
-        print("ERROR: model_artifacts/ folder does not exist!")
-    try:
-        files_to_check = [
-            'model_artifacts/hybrid_model.pkl',
-            'model_artifacts/loader.pkl',
-            'model_artifacts/movies.pkl'
-        ]
-        for file_path in files_to_check:
-            if not os.path.exists(file_path):
-                print(f"ERROR: Missing file: {file_path}")
-            else:
-                file_size = os.path.getsize(file_path) / (1024*1024)
-                print(f"Found: {file_path} ({file_size:.2f} MB)")
-        with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
-            model = pickle.load(f)
-        print("✓ Loaded hybrid_model.pkl")
-        with open('model_artifacts/loader.pkl', 'rb') as f:
-            loader = pickle.load(f)
-        print("✓ Loaded loader.pkl")
-        with open('model_artifacts/movies.pkl', 'rb') as f:
-            movies = pickle.load(f)
-        print("✓ Loaded movies.pkl")
-        user_ids = sorted(loader.user_id_map.keys())
-        print(f"✓ Model loaded successfully! {len(user_ids)} users available")
-        return model, loader, movies, user_ids
-    except FileNotFoundError as e:
-        print(f"ERROR: File not found - {e}")
-        print("Make sure all pkl files are in the model_artifacts/ folder")
-        return None, None, None, []
-    except Exception as e:
-        print(f"ERROR loading model: {type(e).__name__}: {e}")
-        import traceback
-        traceback.print_exc()
-        return None, None, None, []
-print("Loading model and data...")
-model, loader, movies_df, user_ids = load_model_and_data()
-print(f"Model loaded! Available users: {len(user_ids)}")
-def get_recommendations(user_id, num_recommendations):
-    if model is None or loader is None:
-        return "❌ Error: Model not loaded properly. Please check the model files."
-    try:
-        user_id = int(user_id)
-        num_recommendations = int(num_recommendations)
-        if user_id not in loader.user_id_map:
-            return f"❌ User ID {user_id} not found! Please select a valid user ID."
-        recommendations = model.recommend_movies(
-            user_id=user_id,
-            N=num_recommendations,
-            user_id_map=loader.user_id_map,
-            reverse_movie_map=loader.reverse_movie_map,
-            movies_df=movies_df
-        )
-        if not recommendations:
-            return f"❌ No recommendations found for User {user_id}"
-        output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
-        output += "=" * 60 + "\n\n"
-        for i, (movie_id, title, score) in enumerate(recommendations, 1):
-            stars = "⭐" * int(score)
-            output += f"**{i}. {title}**\n"
-            output += f"   • Predicted Rating: {score:.2f}/5.00 {stars}\n"
-            output += f"   • Movie ID: {movie_id}\n\n"
-        return output
-    except ValueError:
-        return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations"
-    except Exception as e:
-        return f"❌ Error generating recommendations: {str(e)}"
-def get_user_history(user_id):
-    if model is None or loader is None:
-        return "❌ Error: Model not loaded properly."
-    try:
-        user_id = int(user_id)
-        if user_id not in loader.user_id_map:
-            return f"❌ User ID {user_id} not found!"
-        user_idx = loader.user_id_map[user_id]
-        user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
-        rated_indices = np.where(user_ratings > 0)[0]
-        if len(rated_indices) == 0:
-            return f"No rating history found for User {user_id}"
-        history = []
-        for movie_idx in rated_indices:
-            original_movie_id = loader.reverse_movie_map[movie_idx]
-            title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0]
-            rating = user_ratings[movie_idx]
-            history.append((title, rating))
-        history.sort(key=lambda x: x[1], reverse=True)
-        output = f"📊 **Rating History for User {user_id}**\n\n"
-        output += f"Total movies rated: {len(history)}\n"
-        output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
-        output += "=" * 60 + "\n\n"
-        output += "**Top 10 Highest Rated Movies:**\n\n"
-        for i, (title, rating) in enumerate(history[:10], 1):
-            stars = "⭐" * int(rating)
-            output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-def get_movie_info(movie_title_search):
-    if movies_df is None:
-        return "❌ Error: Movies data not loaded"
-    try:
-        matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
-        if len(matches) == 0:
-            return f"❌ No movies found matching '{movie_title_search}'"
-        output = f"🔍 **Search Results for '{movie_title_search}'**\n\n"
-        output += f"Found {len(matches)} movie(s):\n\n"
-        output += "=" * 60 + "\n\n"
-        for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
-            output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
-        if len(matches) > 20:
-            output += f"\n... and {len(matches) - 20} more results"
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
-    gr.Markdown("""
-    # 🎬 Hybrid Movie Recommendation System
-    ### DataSynthis Job Task - Powered by AI
-    This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
-    to provide personalized movie recommendations from the MovieLens 100k dataset.
-    ---
-    """)
-    with gr.Tabs():
-        with gr.Tab("🎯 Get Recommendations"):
-            gr.Markdown("### Get personalized movie recommendations for any user")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    user_id_input = gr.Number(
-                        label="User ID",
-                        value=1,
-                        minimum=1,
-                        maximum=943,
-                        step=1,
-                        info=f"Enter a user ID (1-943)"
-                    )
-                    num_recs_input = gr.Slider(
-                        label="Number of Recommendations",
-                        minimum=5,
-                        maximum=20,
-                        value=10,
-                        step=1
-                    )
-                    recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
-                with gr.Column(scale=2):
-                    recommendations_output = gr.Textbox(
-                        label="Recommendations",
-                        lines=20,
-                        max_lines=30
-                    )
-            recommend_btn.click(
-                fn=get_recommendations,
-                inputs=[user_id_input, num_recs_input],
-                outputs=recommendations_output
-            )
-            gr.Markdown("""
-            **How it works:**
-            - Enter a User ID (between 1 and 943)
-            - Choose how many recommendations you want
-            - Click "Get Recommendations" to see personalized movie suggestions
-            """)
-        with gr.Tab("📊 User History"):
-            gr.Markdown("### View a user's rating history")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    user_id_history = gr.Number(
-                        label="User ID",
-                        value=1,
-                        minimum=1,
-                        maximum=943,
-                        step=1
-                    )
-                    history_btn = gr.Button("📊 View History", variant="primary")
-                with gr.Column(scale=2):
-                    history_output = gr.Textbox(
-                        label="Rating History",
-                        lines=20,
-                        max_lines=30
-                    )
-            history_btn.click(
-                fn=get_user_history,
-                inputs=user_id_history,
-                outputs=history_output
-            )
-        with gr.Tab("🔍 Search Movies"):
-            gr.Markdown("### Search for movies in the database")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    movie_search = gr.Textbox(
-                        label="Movie Title Search",
-                        placeholder="e.g., Star Wars, Godfather, Titanic...",
-                        value="Star Wars"
-                    )
-                    search_btn = gr.Button("🔍 Search", variant="primary")
-                with gr.Column(scale=2):
-                    search_output = gr.Textbox(
-                        label="Search Results",
-                        lines=20,
-                        max_lines=30
-                    )
-            search_btn.click(
-                fn=get_movie_info,
-                inputs=movie_search,
-                outputs=search_output
-            )
-        with gr.Tab("ℹ️ About"):
-            gr.Markdown(""" About This System
-            Model Architecture
-            This is a Hybrid Recommendation System that combines three powerful approaches:
-            1. Item-Based Collaborative Filtering
-               - Uses cosine similarity between movies
-               - Recommends movies similar to what you've liked before
-            2. SVD Matrix Factorization
-               - Decomposes the user-movie rating matrix
-               - Discovers latent factors that explain user preferences
-            3. Neural Collaborative Filtering (NCF)
-               - Deep learning model with user and movie embeddings
-               - Learns complex non-linear patterns in user behavior
-            Performance Metrics
-            - Precision@10: 26.77%
-            - NDCG@10: 28.50%
-            - Model improves recommendations by 40% vs baseline**
-            Technologies Used
-            - PyTorch (Neural Networks)
-            - Scikit-learn (SVD, Similarity)
-            - Pandas & NumPy (Data Processing)
-            - Gradio (Web Interface)
-            ---
-            **Note**: This model is trained on the MovieLens 100k dataset.
-            User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
-            """)
-    gr.Markdown("""
-    ---
-    <div style='text-align: center'>
-        <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❤️ for DataSynthis</p>
-    </div>
-    """)
 if __name__ == "__main__":
     demo.launch(
         share=False,

                - Deep learning model with user and movie embeddings
                - Learns complex non-linear patterns in user behavior
             ### 🎯 Performance Metrics
             - **Precision@10**: 26.77%
             - **NDCG@10**: 28.50%
             - **Model improves recommendations by 40% vs baseline**
+            ### Created For
             **DataSynthis Job Task**
             ### 🔗 Technologies Used
     </div>
     """)
 if __name__ == "__main__":
     demo.launch(
         share=False,