package bg.bas.dcl.LLMs; import java.util.List; /** * BiasDetectorDemo * * * ----------------------------------------------------------------------- * MAVEN DEPENDENCIES (add to pom.xml): * * * * org.apache.opennlp * opennlp-tools * 2.4.0 * * * * * org.apache.opennlp * opennlp-models-sentdetect-bg * 1.2 * */ public class BiasDetectorDemo { public static void main(String[] args) { // ------------------------------------------------------------------ // 1. Load the Bulgarian sentence splitter // (loads bundled model from the Maven JAR automatically) // ------------------------------------------------------------------ BulgarianSentenceSplitter splitter = new BulgarianSentenceSplitter(); // Alternatively, supply an explicit model file path: // BulgarianSentenceSplitter splitter = // new BulgarianSentenceSplitter("/path/to/bg-sent.bin"); // ------------------------------------------------------------------ // 2. Load the bias lexicon // ------------------------------------------------------------------ String dictPath = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/" + "bulgarian_bias_dictionary_v4.tsv"; BiasLexicon lexicon = new BiasLexicon(dictPath); System.out.printf("Lexicon loaded: %d entries%n%n", lexicon.size()); // ------------------------------------------------------------------ // 3. Build the analyser // ------------------------------------------------------------------ BiasAnalyser analyser = new BiasAnalyser(lexicon, splitter); // ------------------------------------------------------------------ // 4a. Analyse a block of text in memory // ------------------------------------------------------------------ String sampleText = "Слепите хора трудно могат да се справят сами в живота. " + "Времето днес е слънчево и приятно."; System.out.println("=== Sentence-level bias scores ==="); System.out.println(SentenceBiasScore.tsvHeader()); System.out.println(); List scores = analyser.analyseText(sampleText); for (SentenceBiasScore score : scores) { System.out.println("Sentence : " + score.getSentence()); System.out.printf ("Words : %d%n", score.getTotalWords()); System.out.printf ("Biased : %b%n", score.isBiased()); double[] cov = score.coverageArray(); String[] types = SentenceBiasScore.BIAS_TYPES; for (int i = 0; i < types.length; i++) { if (cov[i] > 0) System.out.printf(" %-18s %.2f%% pair coverage%n", types[i] + ":", cov[i] * 100); } System.out.printf ("Total : %.2f%% overall coverage%n", score.totalCoverage() * 100); System.out.println("Lemmas : " + score.getMatchedLemmas()); System.out.println(); } // ------------------------------------------------------------------ // 4b. Analyse a corpus directory — writes a TSV results file // (only biased sentences are written; zero-coverage sentences // are filtered out automatically by analyseDirectory) // ------------------------------------------------------------------ String corpusDir = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/WIKI/"; String resultTsv = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/bias_results.tsv"; // analyser.analyseDirectory(corpusDir, resultTsv); // ------------------------------------------------------------------ // 4c. Sentence splitting only — using the splitter standalone // ------------------------------------------------------------------ String text = "Това е първото изречение. Второто е по-дълго и сложно! " + "А третото задава въпрос?"; String[] sentences = splitter.split(text); System.out.println("=== Sentence splitting demo ==="); for (int i = 0; i < sentences.length; i++) { System.out.printf(" [%d] %s%n", i + 1, sentences[i]); } } }