Spaces:
Runtime error
Runtime error
| """ | |
| Recommendation Engine Module | |
| This module implements semantic search using FAISS and cosine similarity | |
| to retrieve the most relevant assessments for a given query. | |
| """ | |
| import numpy as np | |
| import faiss | |
| import pickle | |
| import logging | |
| from typing import List, Dict, Tuple | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class AssessmentRecommender: | |
| """Recommender system using FAISS and embeddings""" | |
| def __init__(self): | |
| self.faiss_index = None | |
| self.embeddings = None | |
| self.assessment_mapping = {} | |
| self.embedder = None | |
| def load_index(self, | |
| index_path: str = 'models/faiss_index.faiss', | |
| embeddings_path: str = 'models/embeddings.npy', | |
| mapping_path: str = 'models/mapping.pkl'): | |
| """Load FAISS index and related artifacts""" | |
| try: | |
| # Load FAISS index | |
| self.faiss_index = faiss.read_index(index_path) | |
| logger.info(f"Loaded FAISS index with {self.faiss_index.ntotal} vectors") | |
| # Load embeddings | |
| self.embeddings = np.load(embeddings_path) | |
| logger.info(f"Loaded embeddings with shape {self.embeddings.shape}") | |
| # Load assessment mapping | |
| with open(mapping_path, 'rb') as f: | |
| self.assessment_mapping = pickle.load(f) | |
| logger.info(f"Loaded {len(self.assessment_mapping)} assessment mappings") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error loading index: {e}") | |
| return False | |
| def load_embedder(self): | |
| """Load the embedding model for query encoding""" | |
| from src.embedder import EmbeddingGenerator | |
| if self.embedder is None: | |
| self.embedder = EmbeddingGenerator() | |
| self.embedder.load_model() | |
| logger.info("Embedding model loaded") | |
| def search_faiss(self, query_embedding: np.ndarray, k: int = 15) -> Tuple[np.ndarray, np.ndarray]: | |
| """Search FAISS index for similar assessments""" | |
| if self.faiss_index is None: | |
| raise ValueError("FAISS index not loaded. Call load_index() first.") | |
| # Ensure query embedding is 2D | |
| if query_embedding.ndim == 1: | |
| query_embedding = query_embedding.reshape(1, -1) | |
| # Search | |
| distances, indices = self.faiss_index.search( | |
| query_embedding.astype('float32'), | |
| k | |
| ) | |
| return distances[0], indices[0] | |
| def search_cosine(self, query_embedding: np.ndarray, k: int = 15) -> Tuple[np.ndarray, np.ndarray]: | |
| """Search using sklearn cosine similarity""" | |
| if self.embeddings is None: | |
| raise ValueError("Embeddings not loaded. Call load_index() first.") | |
| # Ensure query embedding is 2D | |
| if query_embedding.ndim == 1: | |
| query_embedding = query_embedding.reshape(1, -1) | |
| # Compute cosine similarities | |
| similarities = cosine_similarity(query_embedding, self.embeddings)[0] | |
| # Get top k indices | |
| top_k_indices = np.argsort(similarities)[-k:][::-1] | |
| top_k_scores = similarities[top_k_indices] | |
| return top_k_scores, top_k_indices | |
| def recommend(self, | |
| query: str, | |
| k: int = 15, | |
| method: str = 'faiss') -> List[Dict]: | |
| """ | |
| Recommend assessments for a given query | |
| Args: | |
| query: Job description or query string | |
| k: Number of recommendations to return | |
| method: 'faiss' or 'cosine' | |
| Returns: | |
| List of recommended assessments with scores | |
| """ | |
| # Load embedder if not loaded | |
| if self.embedder is None: | |
| self.load_embedder() | |
| # Generate query embedding | |
| query_embedding = self.embedder.embed_query(query) | |
| # Search based on method | |
| if method == 'faiss': | |
| scores, indices = self.search_faiss(query_embedding, k) | |
| else: | |
| scores, indices = self.search_cosine(query_embedding, k) | |
| # Build results | |
| recommendations = [] | |
| for idx, score in zip(indices, scores): | |
| if idx in self.assessment_mapping: | |
| assessment = self.assessment_mapping[idx].copy() | |
| assessment['score'] = float(score) | |
| assessment['index'] = int(idx) | |
| recommendations.append(assessment) | |
| logger.info(f"Found {len(recommendations)} recommendations for query") | |
| return recommendations | |
| def recommend_batch(self, | |
| queries: List[str], | |
| k: int = 15, | |
| method: str = 'faiss') -> List[List[Dict]]: | |
| """ | |
| Recommend assessments for multiple queries | |
| Args: | |
| queries: List of job descriptions or query strings | |
| k: Number of recommendations per query | |
| method: 'faiss' or 'cosine' | |
| Returns: | |
| List of recommendation lists | |
| """ | |
| # Load embedder if not loaded | |
| if self.embedder is None: | |
| self.load_embedder() | |
| # Generate query embeddings | |
| query_embeddings = self.embedder.embed_queries(queries) | |
| # Get recommendations for each query | |
| all_recommendations = [] | |
| for i, query_embedding in enumerate(query_embeddings): | |
| # Search | |
| if method == 'faiss': | |
| scores, indices = self.search_faiss(query_embedding, k) | |
| else: | |
| scores, indices = self.search_cosine(query_embedding, k) | |
| # Build results | |
| recommendations = [] | |
| for idx, score in zip(indices, scores): | |
| if idx in self.assessment_mapping: | |
| assessment = self.assessment_mapping[idx].copy() | |
| assessment['score'] = float(score) | |
| assessment['index'] = int(idx) | |
| recommendations.append(assessment) | |
| all_recommendations.append(recommendations) | |
| logger.info(f"Generated recommendations for {len(queries)} queries") | |
| return all_recommendations | |
| def get_assessment_by_url(self, url: str) -> Dict: | |
| """Get assessment details by URL""" | |
| for idx, assessment in self.assessment_mapping.items(): | |
| if assessment['assessment_url'] == url: | |
| return assessment | |
| return None | |
| def get_assessment_by_name(self, name: str) -> Dict: | |
| """Get assessment details by name""" | |
| name_lower = name.lower() | |
| for idx, assessment in self.assessment_mapping.items(): | |
| if assessment['assessment_name'].lower() == name_lower: | |
| return assessment | |
| return None | |
| def main(): | |
| """Main execution function""" | |
| # Initialize recommender | |
| recommender = AssessmentRecommender() | |
| # Load index | |
| recommender.load_index() | |
| # Test queries | |
| test_queries = [ | |
| "Looking for a Java developer with strong programming skills", | |
| "Need a team leader with excellent communication and management abilities", | |
| "Seeking a data analyst who can work with SQL and Python", | |
| "Want to assess personality traits for customer service role" | |
| ] | |
| print("\n=== Recommendation Test ===\n") | |
| for query in test_queries: | |
| print(f"\nQuery: {query}") | |
| print("-" * 80) | |
| # Get recommendations | |
| recommendations = recommender.recommend(query, k=5, method='faiss') | |
| for i, rec in enumerate(recommendations, 1): | |
| print(f"\n{i}. {rec['assessment_name']}") | |
| print(f" Category: {rec['category']}") | |
| print(f" Type: {rec['test_type']}") | |
| print(f" Score: {rec['score']:.4f}") | |
| print(f" Description: {rec['description'][:100]}...") | |
| return recommender | |
| if __name__ == "__main__": | |
| main() | |