Spaces:

DCL-IBL
/

IfGPT-DataQualityComponents

Running

App Files Files Community

IfGPT-DataQualityComponents / java /bg /bas /dcl /LLMs /BiasDetectorDemo.java

dcl-ibl-bas

Upload 22 files

18573e4 verified 5 days ago

raw

history blame contribute delete

4.73 kB

	package bg.bas.dcl.LLMs;

	import java.util.List;

	/**
	* BiasDetectorDemo
	*
	*
	* -----------------------------------------------------------------------
	* MAVEN DEPENDENCIES (add to pom.xml):
	*
	* <!-- OpenNLP toolkit -->
	* <dependency>
	* <groupId>org.apache.opennlp</groupId>
	* <artifactId>opennlp-tools</artifactId>
	* <version>2.4.0</version>
	* </dependency>
	*
	* <!-- Bulgarian sentence-detection model (UD 2.14, Apache 2.0) -->
	* <dependency>
	* <groupId>org.apache.opennlp</groupId>
	* <artifactId>opennlp-models-sentdetect-bg</artifactId>
	* <version>1.2</version>
	* </dependency>
	*/
	public class BiasDetectorDemo {

	public static void main(String[] args) {

	// ------------------------------------------------------------------
	// 1. Load the Bulgarian sentence splitter
	// (loads bundled model from the Maven JAR automatically)
	// ------------------------------------------------------------------
	BulgarianSentenceSplitter splitter = new BulgarianSentenceSplitter();

	// Alternatively, supply an explicit model file path:
	// BulgarianSentenceSplitter splitter =
	// new BulgarianSentenceSplitter("/path/to/bg-sent.bin");


	// ------------------------------------------------------------------
	// 2. Load the bias lexicon
	// ------------------------------------------------------------------
	String dictPath = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/"
	+ "bulgarian_bias_dictionary_v4.tsv";

	BiasLexicon lexicon = new BiasLexicon(dictPath);
	System.out.printf("Lexicon loaded: %d entries%n%n", lexicon.size());


	// ------------------------------------------------------------------
	// 3. Build the analyser
	// ------------------------------------------------------------------
	BiasAnalyser analyser = new BiasAnalyser(lexicon, splitter);


	// ------------------------------------------------------------------
	// 4a. Analyse a block of text in memory
	// ------------------------------------------------------------------
	String sampleText =
	"Слепите хора трудно могат да се справят сами в живота. " +
	"Времето днес е слънчево и приятно.";

	System.out.println("=== Sentence-level bias scores ===");
	System.out.println(SentenceBiasScore.tsvHeader());
	System.out.println();

	List<SentenceBiasScore> scores = analyser.analyseText(sampleText);

	for (SentenceBiasScore score : scores) {
	System.out.println("Sentence : " + score.getSentence());
	System.out.printf ("Words : %d%n", score.getTotalWords());
	System.out.printf ("Biased : %b%n", score.isBiased());

	double[] cov = score.coverageArray();
	String[] types = SentenceBiasScore.BIAS_TYPES;
	for (int i = 0; i < types.length; i++) {
	if (cov[i] > 0)
	System.out.printf(" %-18s %.2f%% pair coverage%n",
	types[i] + ":", cov[i] * 100);
	}
	System.out.printf ("Total : %.2f%% overall coverage%n", score.totalCoverage() * 100);
	System.out.println("Lemmas : " + score.getMatchedLemmas());
	System.out.println();
	}


	// ------------------------------------------------------------------
	// 4b. Analyse a corpus directory — writes a TSV results file
	// (only biased sentences are written; zero-coverage sentences
	// are filtered out automatically by analyseDirectory)
	// ------------------------------------------------------------------
	String corpusDir = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/WIKI/";
	String resultTsv = "/home/ivelina/WORK-DCL/WIKIPEDIA-BIAS/bias_results.tsv";

	// analyser.analyseDirectory(corpusDir, resultTsv);


	// ------------------------------------------------------------------
	// 4c. Sentence splitting only — using the splitter standalone
	// ------------------------------------------------------------------
	String text = "Това е първото изречение. Второто е по-дълго и сложно! " +
	"А третото задава въпрос?";

	String[] sentences = splitter.split(text);
	System.out.println("=== Sentence splitting demo ===");
	for (int i = 0; i < sentences.length; i++) {
	System.out.printf(" [%d] %s%n", i + 1, sentences[i]);
	}
	}
	}