Spaces:
Running
Running
| from dataclasses import dataclass | |
| import pandas as pd | |
| from src.methodology import SimpleMethodology | |
| from src.neighbours import EmbeddingClosestNeighbours | |
| from src.scorer import EmbeddingsOriginalityScorer | |
| _FALLBACK_INDEX = 99 | |
| class EmbeddingsAnalysis: | |
| """ | |
| Facade for analyzing embeddings, combining neighbor search and originality scoring. | |
| :param index: FAISS index for similarity search. | |
| :param all_labels: DataFrame containing 'track_id' and 'length' columns for indexed entries. | |
| :param lookup: Pandas DataFrame containing metadata for each indexed entry. | |
| :param scalers: Dictionary mapping length ranges to quantile transformers for score normalization. | |
| :param close_threshold: Similarity threshold for neighbor search. | |
| """ | |
| def __init__(self, index, all_labels, lookup, scalers, radii, close_threshold=0.95, score_power=1.0): | |
| all_labels_np = all_labels['track_id'].to_numpy() | |
| all_lengths_np = all_labels['length'].to_numpy() | |
| self._ecn = EmbeddingClosestNeighbours(index, all_labels_np, all_lengths_np, lookup, close_threshold=close_threshold) | |
| specific_scalers = {i: scaler for (l, r), scaler in scalers.items() for i in range(l, r)} | |
| sm = SimpleMethodology(specific_scalers, specific_scalers[_FALLBACK_INDEX], score_power=score_power) | |
| self._scorer = EmbeddingsOriginalityScorer(index, all_labels_np, radii, sm) | |
| def get_scores(self, embeddings, lengths): | |
| score = self._scorer.score(embeddings, pd.Series(lengths)) | |
| return score | |
| def get_neighbours(self, embeddings, limit=None): | |
| neighbours = self._ecn.get(embeddings, limit) | |
| return neighbours |