File size: 5,252 Bytes
18573e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package bg.bas.dcl.LLMs;

import java.util.Collections;
import java.util.List;
import java.util.Map;

/**
 * SentenceBiasScore
 *
 */
public class SentenceBiasScore {

       public static final String[] BIAS_TYPES = {
        "gender", "race_ethnicity", "religion", "disability", "appearance"
    };

  
     private final String sentence;

      private final int totalWords;

   
    private final Map<String, Double> pairCoverage;

     
    private final Map<String, Integer> signalCount;

     
    private final Map<String, Integer> evaluatorCount;

    
    /** All dictionary entries matched in this sentence (lemma strings). */
    private final List<String> matchedLemmas;

    /** Total matched bias words (evaluative, non-neutral). */
    private final int totalBiasWords;

    /** Count of matched derogatory terms. */
    private final int totalDerogatory;

    /** Count of matched colloquial terms. */
    private final int totalColloquial;

     
    private final boolean multiType;

     
    SentenceBiasScore(String sentence,
                      int totalWords,
                      Map<String, Double>  pairCoverage,
                      Map<String, Integer> signalCount,
                      Map<String, Integer> evaluatorCount,
                      List<String>         matchedLemmas,
                      int totalBiasWords,
                      int totalDerogatory,
                      int totalColloquial,
                      boolean multiType) {
        this.sentence       = sentence;
        this.totalWords     = totalWords;
        this.pairCoverage   = Collections.unmodifiableMap(pairCoverage);
        this.signalCount    = Collections.unmodifiableMap(signalCount);
        this.evaluatorCount = Collections.unmodifiableMap(evaluatorCount);
        this.matchedLemmas  = Collections.unmodifiableList(matchedLemmas);
        this.totalBiasWords = totalBiasWords;
        this.totalDerogatory= totalDerogatory;
        this.totalColloquial= totalColloquial;
        this.multiType      = multiType;
    }

  
    public double getPairCoverage(String biasType) {
        if (biasType == null || biasType.isBlank()) return totalCoverage();
        return pairCoverage.getOrDefault(biasType.toLowerCase(), 0.0);
    }

     
    public double totalCoverage() {
        double sum = 0;
        for (double v : pairCoverage.values()) sum += v;
        return sum;
    }

    
    public double[] coverageArray() {
        double[] arr = new double[BIAS_TYPES.length];
        for (int i = 0; i < BIAS_TYPES.length; i++)
            arr[i] = getPairCoverage(BIAS_TYPES[i]);
        return arr;
    }

    /** True if any bias type has a non-zero pair-coverage score. */
    public boolean isBiased() {
        for (double v : pairCoverage.values())
            if (v > 0) return true;
        return false;
    }

     
    public String      getSentence()                        { return sentence; }
    public int         getTotalWords()                      { return totalWords; }
    public int         getSignalCount(String type)          { return signalCount.getOrDefault(type, 0); }
    public int         getEvaluatorCount(String type)       { return evaluatorCount.getOrDefault(type, 0); }
    public List<String>getMatchedLemmas()                   { return matchedLemmas; }
    public int         getTotalBiasWords()                  { return totalBiasWords; }
    public int         getTotalDerogatory()                 { return totalDerogatory; }
    public int         getTotalColloquial()                 { return totalColloquial; }
    public boolean     isMultiType()                        { return multiType; }

  
    public String toTsv() {
        StringBuilder sb = new StringBuilder();
        sb.append(sentence).append('\t');
        sb.append(totalWords).append('\t');
        sb.append(matchedLemmas).append('\t');

        for (String type : BIAS_TYPES) {
            sb.append(signalCount.getOrDefault(type, 0)).append('\t');
            sb.append(evaluatorCount.getOrDefault(type, 0)).append('\t');
            sb.append(String.format("%.4f", getPairCoverage(type))).append('\t');
        }

        sb.append(totalBiasWords).append('\t');
        sb.append(totalDerogatory).append('\t');
        sb.append(totalColloquial).append('\t');
        sb.append(multiType ? 1 : 0).append('\t');
        sb.append(String.format("%.4f", totalCoverage()));

        return sb.toString();
    }

 
    public static String tsvHeader() {
        StringBuilder sb = new StringBuilder();
        sb.append("sentence\ttotalWords\tmatchedLemmas\t");
        for (String type : BIAS_TYPES)
            sb.append(type).append("_signals\t")
              .append(type).append("_evaluators\t")
              .append(type).append("_coverage\t");
        sb.append("totalBiasWords\ttotalDerogatory\ttotalColloquial\t")
          .append("multiType\ttotalCoverage");
        return sb.toString();
    }

    @Override
    public String toString() {
        return String.format("SentenceBiasScore{words=%d, coverage=%.3f, biased=%b, sentence='%s'}",
                totalWords, totalCoverage(), isBiased(),
                sentence.length() > 80 ? sentence.substring(0, 80) + "…" : sentence);
    }
}