| | |
| | |
| | |
| | export class Evaluator{ |
| | constructor(){ } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | evaluate(pred, truth, latencyMs) { |
| | const total_tokens = this._countTokens(pred); |
| | return { |
| | exactMatch: this._exactMatch(pred.answer, truth), |
| | f1WordLevel: this._f1WordLevel(pred.answer, truth), |
| | rouge1: this._rouge1(pred.answer, truth), |
| | totalTokens: total_tokens, |
| | tokensPerSecond: this._tokensPerSecond(total_tokens, latencyMs) |
| | }; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | _f1WordLevel(pred, truth) { |
| | const predTokens = this._normalize(pred).split(/\s+/).filter(Boolean); |
| | const truthTokens = this._normalize(truth).split(/\s+/).filter(Boolean); |
| | const predSet = new Set(predTokens); |
| | const truthSet = new Set(truthTokens); |
| | const common = predTokens.filter(t => truthSet.has(t)); |
| | const numCommon = common.length; |
| | if (numCommon === 0) return 0; |
| | const precision = numCommon / predTokens.length; |
| | const recall = numCommon / truthTokens.length; |
| | return (2 * precision * recall) / (precision + recall); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | _rouge1(pred, truth) { |
| | const predTokens = this._normalize(pred).split(/\s+/).filter(Boolean); |
| | const truthTokens = this._normalize(truth).split(/\s+/).filter(Boolean); |
| | const truthTokenCounts = {}; |
| | for (const t of truthTokens) { |
| | truthTokenCounts[t] = (truthTokenCounts[t] || 0) + 1; |
| | } |
| | let overlap = 0; |
| | for (const t of predTokens) { |
| | if (truthTokenCounts[t]) { |
| | overlap++; |
| | truthTokenCounts[t]--; |
| | } |
| | } |
| | return truthTokens.length > 0 ? overlap / truthTokens.length : 0; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | _exactMatch(pred, truth){ |
| | return this._normalize(pred) === this._normalize(truth) ? 1 : 0; |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | _normalize(s){ |
| | return String(s||'').trim().toLowerCase(); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | _countTokens(s) { |
| | return String(s||'').trim().split(/\s+/).filter(Boolean).length; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | _tokensPerSecond(tokenCount, latencyMs) { |
| | return latencyMs > 0 ? tokenCount / (latencyMs / 1000) : 0; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | _myMetric(pred, truth){ |
| | return 0; |
| | } |
| |
|
| | } |