File size: 5,252 Bytes
18573e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | package bg.bas.dcl.LLMs;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* SentenceBiasScore
*
*/
public class SentenceBiasScore {
public static final String[] BIAS_TYPES = {
"gender", "race_ethnicity", "religion", "disability", "appearance"
};
private final String sentence;
private final int totalWords;
private final Map<String, Double> pairCoverage;
private final Map<String, Integer> signalCount;
private final Map<String, Integer> evaluatorCount;
/** All dictionary entries matched in this sentence (lemma strings). */
private final List<String> matchedLemmas;
/** Total matched bias words (evaluative, non-neutral). */
private final int totalBiasWords;
/** Count of matched derogatory terms. */
private final int totalDerogatory;
/** Count of matched colloquial terms. */
private final int totalColloquial;
private final boolean multiType;
SentenceBiasScore(String sentence,
int totalWords,
Map<String, Double> pairCoverage,
Map<String, Integer> signalCount,
Map<String, Integer> evaluatorCount,
List<String> matchedLemmas,
int totalBiasWords,
int totalDerogatory,
int totalColloquial,
boolean multiType) {
this.sentence = sentence;
this.totalWords = totalWords;
this.pairCoverage = Collections.unmodifiableMap(pairCoverage);
this.signalCount = Collections.unmodifiableMap(signalCount);
this.evaluatorCount = Collections.unmodifiableMap(evaluatorCount);
this.matchedLemmas = Collections.unmodifiableList(matchedLemmas);
this.totalBiasWords = totalBiasWords;
this.totalDerogatory= totalDerogatory;
this.totalColloquial= totalColloquial;
this.multiType = multiType;
}
public double getPairCoverage(String biasType) {
if (biasType == null || biasType.isBlank()) return totalCoverage();
return pairCoverage.getOrDefault(biasType.toLowerCase(), 0.0);
}
public double totalCoverage() {
double sum = 0;
for (double v : pairCoverage.values()) sum += v;
return sum;
}
public double[] coverageArray() {
double[] arr = new double[BIAS_TYPES.length];
for (int i = 0; i < BIAS_TYPES.length; i++)
arr[i] = getPairCoverage(BIAS_TYPES[i]);
return arr;
}
/** True if any bias type has a non-zero pair-coverage score. */
public boolean isBiased() {
for (double v : pairCoverage.values())
if (v > 0) return true;
return false;
}
public String getSentence() { return sentence; }
public int getTotalWords() { return totalWords; }
public int getSignalCount(String type) { return signalCount.getOrDefault(type, 0); }
public int getEvaluatorCount(String type) { return evaluatorCount.getOrDefault(type, 0); }
public List<String>getMatchedLemmas() { return matchedLemmas; }
public int getTotalBiasWords() { return totalBiasWords; }
public int getTotalDerogatory() { return totalDerogatory; }
public int getTotalColloquial() { return totalColloquial; }
public boolean isMultiType() { return multiType; }
public String toTsv() {
StringBuilder sb = new StringBuilder();
sb.append(sentence).append('\t');
sb.append(totalWords).append('\t');
sb.append(matchedLemmas).append('\t');
for (String type : BIAS_TYPES) {
sb.append(signalCount.getOrDefault(type, 0)).append('\t');
sb.append(evaluatorCount.getOrDefault(type, 0)).append('\t');
sb.append(String.format("%.4f", getPairCoverage(type))).append('\t');
}
sb.append(totalBiasWords).append('\t');
sb.append(totalDerogatory).append('\t');
sb.append(totalColloquial).append('\t');
sb.append(multiType ? 1 : 0).append('\t');
sb.append(String.format("%.4f", totalCoverage()));
return sb.toString();
}
public static String tsvHeader() {
StringBuilder sb = new StringBuilder();
sb.append("sentence\ttotalWords\tmatchedLemmas\t");
for (String type : BIAS_TYPES)
sb.append(type).append("_signals\t")
.append(type).append("_evaluators\t")
.append(type).append("_coverage\t");
sb.append("totalBiasWords\ttotalDerogatory\ttotalColloquial\t")
.append("multiType\ttotalCoverage");
return sb.toString();
}
@Override
public String toString() {
return String.format("SentenceBiasScore{words=%d, coverage=%.3f, biased=%b, sentence='%s'}",
totalWords, totalCoverage(), isBiased(),
sentence.length() > 80 ? sentence.substring(0, 80) + "…" : sentence);
}
}
|