File size: 4,725 Bytes
18573e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | package bg.bas.dcl.LLMs;
import java.util.List;
/**
* BiasDetectorDemo
*
*
* -----------------------------------------------------------------------
* MAVEN DEPENDENCIES (add to pom.xml):
*
* <!-- OpenNLP toolkit -->
* <dependency>
* <groupId>org.apache.opennlp</groupId>
* <artifactId>opennlp-tools</artifactId>
* <version>2.4.0</version>
* </dependency>
*
* <!-- Bulgarian sentence-detection model (UD 2.14, Apache 2.0) -->
* <dependency>
* <groupId>org.apache.opennlp</groupId>
* <artifactId>opennlp-models-sentdetect-bg</artifactId>
* <version>1.2</version>
* </dependency>
*/
public class BiasDetectorDemo {
public static void main(String[] args) {
// ------------------------------------------------------------------
// 1. Load the Bulgarian sentence splitter
// (loads bundled model from the Maven JAR automatically)
// ------------------------------------------------------------------
BulgarianSentenceSplitter splitter = new BulgarianSentenceSplitter();
// Alternatively, supply an explicit model file path:
// BulgarianSentenceSplitter splitter =
// new BulgarianSentenceSplitter("/path/to/bg-sent.bin");
// ------------------------------------------------------------------
// 2. Load the bias lexicon
// ------------------------------------------------------------------
String dictPath = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/"
+ "bulgarian_bias_dictionary_v4.tsv";
BiasLexicon lexicon = new BiasLexicon(dictPath);
System.out.printf("Lexicon loaded: %d entries%n%n", lexicon.size());
// ------------------------------------------------------------------
// 3. Build the analyser
// ------------------------------------------------------------------
BiasAnalyser analyser = new BiasAnalyser(lexicon, splitter);
// ------------------------------------------------------------------
// 4a. Analyse a block of text in memory
// ------------------------------------------------------------------
String sampleText =
"Слепите хора трудно могат да се справят сами в живота. " +
"Времето днес е слънчево и приятно.";
System.out.println("=== Sentence-level bias scores ===");
System.out.println(SentenceBiasScore.tsvHeader());
System.out.println();
List<SentenceBiasScore> scores = analyser.analyseText(sampleText);
for (SentenceBiasScore score : scores) {
System.out.println("Sentence : " + score.getSentence());
System.out.printf ("Words : %d%n", score.getTotalWords());
System.out.printf ("Biased : %b%n", score.isBiased());
double[] cov = score.coverageArray();
String[] types = SentenceBiasScore.BIAS_TYPES;
for (int i = 0; i < types.length; i++) {
if (cov[i] > 0)
System.out.printf(" %-18s %.2f%% pair coverage%n",
types[i] + ":", cov[i] * 100);
}
System.out.printf ("Total : %.2f%% overall coverage%n", score.totalCoverage() * 100);
System.out.println("Lemmas : " + score.getMatchedLemmas());
System.out.println();
}
// ------------------------------------------------------------------
// 4b. Analyse a corpus directory — writes a TSV results file
// (only biased sentences are written; zero-coverage sentences
// are filtered out automatically by analyseDirectory)
// ------------------------------------------------------------------
String corpusDir = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/WIKI/";
String resultTsv = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/bias_results.tsv";
// analyser.analyseDirectory(corpusDir, resultTsv);
// ------------------------------------------------------------------
// 4c. Sentence splitting only — using the splitter standalone
// ------------------------------------------------------------------
String text = "Това е първото изречение. Второто е по-дълго и сложно! " +
"А третото задава въпрос?";
String[] sentences = splitter.split(text);
System.out.println("=== Sentence splitting demo ===");
for (int i = 0; i < sentences.length; i++) {
System.out.printf(" [%d] %s%n", i + 1, sentences[i]);
}
}
}
|