| """Information Retrieval metrics | |
| Useful Resources: | |
| http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt | |
| http://www.nii.ac.jp/TechReports/05-014E.pdf | |
| http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
| http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf | |
| Learning to Rank for Information Retrieval (Tie-Yan Liu) | |
| """ | |
| import numpy as np | |
| import pdb | |
| import os | |
| import pickle | |
| def mean_reciprocal_rank(rs): | |
| """Score is reciprocal of the rank of the first relevant item | |
| First element is 'rank 1'. Relevance is binary (nonzero is relevant). | |
| Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank | |
| >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]] | |
| >>> mean_reciprocal_rank(rs) | |
| 0.61111111111111105 | |
| >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]]) | |
| >>> mean_reciprocal_rank(rs) | |
| 0.5 | |
| >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]] | |
| >>> mean_reciprocal_rank(rs) | |
| 0.75 | |
| Args: | |
| rs: Iterator of relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| Mean reciprocal rank | |
| """ | |
| rs = (np.asarray(r).nonzero()[0] for r in rs) | |
| return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs]) | |
| def r_precision(r): | |
| """Score is precision after all relevant documents have been retrieved | |
| Relevance is binary (nonzero is relevant). | |
| >>> r = [0, 0, 1] | |
| >>> r_precision(r) | |
| 0.33333333333333331 | |
| >>> r = [0, 1, 0] | |
| >>> r_precision(r) | |
| 0.5 | |
| >>> r = [1, 0, 0] | |
| >>> r_precision(r) | |
| 1.0 | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| R Precision | |
| """ | |
| r = np.asarray(r) != 0 | |
| z = r.nonzero()[0] | |
| if not z.size: | |
| return 0. | |
| return np.mean(r[:z[-1] + 1]) | |
| def precision_at_k(r, k): | |
| """Score is precision @ k | |
| Relevance is binary (nonzero is relevant). | |
| >>> r = [0, 0, 1] | |
| >>> precision_at_k(r, 1) | |
| 0.0 | |
| >>> precision_at_k(r, 2) | |
| 0.0 | |
| >>> precision_at_k(r, 3) | |
| 0.33333333333333331 | |
| >>> precision_at_k(r, 4) | |
| Traceback (most recent call last): | |
| File "<stdin>", line 1, in ? | |
| ValueError: Relevance score length < k | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| Precision @ k | |
| Raises: | |
| ValueError: len(r) must be >= k | |
| """ | |
| assert k >= 1 | |
| r = np.asarray(r)[:k] != 0 | |
| if r.size != k: | |
| raise ValueError('Relevance score length < k') | |
| return np.mean(r) | |
| def average_precision(r): | |
| """Score is average precision (area under PR curve) | |
| Relevance is binary (nonzero is relevant). | |
| >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1] | |
| >>> delta_r = 1. / sum(r) | |
| >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y]) | |
| 0.7833333333333333 | |
| >>> average_precision(r) | |
| 0.78333333333333333 | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| Average precision | |
| """ | |
| r = np.asarray(r) != 0 | |
| out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]] | |
| if not out: | |
| return 0. | |
| return np.mean(out) | |
| def mean_average_precision(rs): | |
| """Score is mean average precision | |
| Relevance is binary (nonzero is relevant). | |
| >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]] | |
| >>> mean_average_precision(rs) | |
| 0.78333333333333333 | |
| >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]] | |
| >>> mean_average_precision(rs) | |
| 0.39166666666666666 | |
| Args: | |
| rs: Iterator of relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| Mean average precision | |
| """ | |
| return np.mean([average_precision(r) for r in rs]) | |
| def row_pop_average_precision(r, target): | |
| """Score is average precision (area under PR curve) | |
| Relevance is binary (nonzero is relevant). | |
| >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1] | |
| >>> delta_r = 1. / sum(r) | |
| >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y]) | |
| 0.7833333333333333 | |
| >>> average_precision(r) | |
| 0.78333333333333333 | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| Returns: | |
| Average precision | |
| """ | |
| r = np.asarray(r) != 0 | |
| out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]] | |
| if len(out) < len(target): | |
| out += [0] * (len(target) - len(out)) | |
| if not out: | |
| return 0. | |
| return np.mean(out) | |
| def dcg_at_k(r, k, method=0): | |
| """Score is discounted cumulative gain (dcg) | |
| Relevance is positive real values. Can use binary | |
| as the previous methods. | |
| Example from | |
| http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
| >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0] | |
| >>> dcg_at_k(r, 1) | |
| 3.0 | |
| >>> dcg_at_k(r, 1, method=1) | |
| 3.0 | |
| >>> dcg_at_k(r, 2) | |
| 5.0 | |
| >>> dcg_at_k(r, 2, method=1) | |
| 4.2618595071429155 | |
| >>> dcg_at_k(r, 10) | |
| 9.6051177391888114 | |
| >>> dcg_at_k(r, 11) | |
| 9.6051177391888114 | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| k: Number of results to consider | |
| method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...] | |
| If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...] | |
| Returns: | |
| Discounted cumulative gain | |
| """ | |
| r = np.asfarray(r)[:k] | |
| if r.size: | |
| if method == 0: | |
| return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) | |
| elif method == 1: | |
| return np.sum(r / np.log2(np.arange(2, r.size + 2))) | |
| else: | |
| raise ValueError('method must be 0 or 1.') | |
| return 0. | |
| def ndcg_at_k(r, k, method=0): | |
| """Score is normalized discounted cumulative gain (ndcg) | |
| Relevance is positive real values. Can use binary | |
| as the previous methods. | |
| Example from | |
| http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf | |
| >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0] | |
| >>> ndcg_at_k(r, 1) | |
| 1.0 | |
| >>> r = [2, 1, 2, 0] | |
| >>> ndcg_at_k(r, 4) | |
| 0.9203032077642922 | |
| >>> ndcg_at_k(r, 4, method=1) | |
| 0.96519546960144276 | |
| >>> ndcg_at_k([0], 1) | |
| 0.0 | |
| >>> ndcg_at_k([1], 2) | |
| 1.0 | |
| Args: | |
| r: Relevance scores (list or numpy) in rank order | |
| (first element is the first item) | |
| k: Number of results to consider | |
| method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...] | |
| If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...] | |
| Returns: | |
| Normalized discounted cumulative gain | |
| """ | |
| dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) | |
| if not dcg_max: | |
| return 0. | |
| return dcg_at_k(r, k, method) / dcg_max | |