package bg.bas.dcl.LLMs;
import java.util.List;
/**
* BiasDetectorDemo
*
*
* -----------------------------------------------------------------------
* MAVEN DEPENDENCIES (add to pom.xml):
*
*
*
* org.apache.opennlp
* opennlp-tools
* 2.4.0
*
*
*
*
* org.apache.opennlp
* opennlp-models-sentdetect-bg
* 1.2
*
*/
public class BiasDetectorDemo {
public static void main(String[] args) {
// ------------------------------------------------------------------
// 1. Load the Bulgarian sentence splitter
// (loads bundled model from the Maven JAR automatically)
// ------------------------------------------------------------------
BulgarianSentenceSplitter splitter = new BulgarianSentenceSplitter();
// Alternatively, supply an explicit model file path:
// BulgarianSentenceSplitter splitter =
// new BulgarianSentenceSplitter("/path/to/bg-sent.bin");
// ------------------------------------------------------------------
// 2. Load the bias lexicon
// ------------------------------------------------------------------
String dictPath = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/"
+ "bulgarian_bias_dictionary_v4.tsv";
BiasLexicon lexicon = new BiasLexicon(dictPath);
System.out.printf("Lexicon loaded: %d entries%n%n", lexicon.size());
// ------------------------------------------------------------------
// 3. Build the analyser
// ------------------------------------------------------------------
BiasAnalyser analyser = new BiasAnalyser(lexicon, splitter);
// ------------------------------------------------------------------
// 4a. Analyse a block of text in memory
// ------------------------------------------------------------------
String sampleText =
"Слепите хора трудно могат да се справят сами в живота. " +
"Времето днес е слънчево и приятно.";
System.out.println("=== Sentence-level bias scores ===");
System.out.println(SentenceBiasScore.tsvHeader());
System.out.println();
List scores = analyser.analyseText(sampleText);
for (SentenceBiasScore score : scores) {
System.out.println("Sentence : " + score.getSentence());
System.out.printf ("Words : %d%n", score.getTotalWords());
System.out.printf ("Biased : %b%n", score.isBiased());
double[] cov = score.coverageArray();
String[] types = SentenceBiasScore.BIAS_TYPES;
for (int i = 0; i < types.length; i++) {
if (cov[i] > 0)
System.out.printf(" %-18s %.2f%% pair coverage%n",
types[i] + ":", cov[i] * 100);
}
System.out.printf ("Total : %.2f%% overall coverage%n", score.totalCoverage() * 100);
System.out.println("Lemmas : " + score.getMatchedLemmas());
System.out.println();
}
// ------------------------------------------------------------------
// 4b. Analyse a corpus directory — writes a TSV results file
// (only biased sentences are written; zero-coverage sentences
// are filtered out automatically by analyseDirectory)
// ------------------------------------------------------------------
String corpusDir = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/WIKI/";
String resultTsv = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/bias_results.tsv";
// analyser.analyseDirectory(corpusDir, resultTsv);
// ------------------------------------------------------------------
// 4c. Sentence splitting only — using the splitter standalone
// ------------------------------------------------------------------
String text = "Това е първото изречение. Второто е по-дълго и сложно! " +
"А третото задава въпрос?";
String[] sentences = splitter.split(text);
System.out.println("=== Sentence splitting demo ===");
for (int i = 0; i < sentences.length; i++) {
System.out.printf(" [%d] %s%n", i + 1, sentences[i]);
}
}
}