Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import fitz # PyMuPDF | |
| import re | |
| import pandas as pd | |
| # Load detection models | |
| bias_detector = pipeline("text-classification", model="himel7/bias-detector") | |
| bias_type_classifier = pipeline("text-classification", model="maximuspowers/bias-type-classifier") | |
| # Load neutralizer models (lazy load for speed) | |
| neutralizer_models = { | |
| "BART Neutralizer": "himel7/bias-neutralizer-bart", | |
| "T5 Small Neutralizer": "himel7/bias-neutralizer-t5s" | |
| } | |
| neutralizers = {} | |
| def get_neutralizer(model_name): | |
| if model_name not in neutralizers: | |
| neutralizers[model_name] = pipeline("text2text-generation", model=neutralizer_models[model_name]) | |
| return neutralizers[model_name] | |
| # Utils | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| with fitz.open(pdf_file) as pdf: | |
| for page in pdf: | |
| text += page.get_text("text") | |
| return text | |
| def split_into_sentences(text): | |
| sentences = re.split(r'(?<=[.!?])\s+', text.strip()) | |
| return [s for s in sentences if s] | |
| def analyze_sentence(sentence): | |
| detection_result = bias_detector(sentence)[0] | |
| label = detection_result['label'] | |
| score = detection_result['score'] | |
| if label == "LABEL_1": # Biased | |
| type_result = bias_type_classifier(sentence)[0] | |
| return { | |
| "sentence": sentence, | |
| "bias": "Biased", | |
| "bias_score": round(score, 2), | |
| "bias_type": type_result['label'], | |
| "bias_type_score": round(type_result['score'], 2) | |
| } | |
| else: | |
| return { | |
| "sentence": sentence, | |
| "bias": "Unbiased", | |
| "bias_score": round(score, 2), | |
| "bias_type": "-", | |
| "bias_type_score": "-" | |
| } | |
| def analyze_pdf(pdf_file): | |
| text = extract_text_from_pdf(pdf_file) | |
| sentences = split_into_sentences(text) | |
| results = [analyze_sentence(s) for s in sentences] | |
| # Stats | |
| total = len(results) | |
| biased = sum(1 for r in results if r["bias"] == "Biased") | |
| unbiased = total - biased | |
| stats_md = f""" | |
| ### π Bias Statistics | |
| - **Total Sentences:** {total} | |
| - **Biased Sentences:** {biased} ({(biased/total)*100:.1f}%) | |
| - **Unbiased Sentences:** {unbiased} ({(unbiased/total)*100:.1f}%) | |
| """ | |
| df = pd.DataFrame(results) | |
| return stats_md, df | |
| def analyze_text(text): | |
| return analyze_sentence(text) | |
| # New: Neutralize Bias | |
| def neutralize_text(text, model_choice): | |
| neutralizer = get_neutralizer(model_choice) | |
| result = neutralizer(text, max_length=512, do_sample=False) | |
| return result[0]["generated_text"] | |
| def neutralize_pdf(pdf_file, model_choice): | |
| text = extract_text_from_pdf(pdf_file) | |
| sentences = split_into_sentences(text) | |
| neutralizer = get_neutralizer(model_choice) | |
| neutralized_sentences = [neutralizer(s, max_length=512, do_sample=False)[0]["generated_text"] for s in sentences] | |
| neutralized_text = " ".join(neutralized_sentences) | |
| return neutralized_text | |
| # Top badges | |
| badges_html = """ | |
| <p align="center"> | |
| <a href="https://huggingface.co/himel7/bias-detector"> | |
| <img src="https://img.shields.io/badge/π€-Hugging%20Face-yellow.svg"> | |
| </a> | |
| <a href="https://huggingface.co/himel7/bias-detector"> | |
| <img src="https://img.shields.io/badge/Model-Homepage-purple.svg"> | |
| </a> | |
| <a href="https://github.com/Himel1996/NewsBiasDetector/"> | |
| <img src="https://img.shields.io/badge/GitHub-Repo-orange.svg"> | |
| </a> | |
| <a href="https://arxiv.org/abs/2505.13010v1"> | |
| <img src="https://img.shields.io/badge/arXiv-2505.13010-red.svg"> | |
| </a> | |
| </p> | |
| """ | |
| # Build UI | |
| with gr.Blocks() as demo: | |
| gr.HTML(badges_html) | |
| gr.Markdown("## Bias Analyzer & Neutralizer") | |
| gr.Markdown("### This app helps you to detect biases in sentences, analyse them, and neutralize sentences.") | |
| with gr.Tab("Single Sentence"): | |
| text_input = gr.Textbox(lines=3, placeholder="Enter a sentence...") | |
| output = gr.JSON() | |
| btn = gr.Button("Analyze") | |
| btn.click(analyze_text, inputs=text_input, outputs=output) | |
| gr.Markdown("### Neutralize Bias") | |
| model_choice = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer") | |
| neutral_output = gr.Textbox(label="Neutralized Sentence", lines=3) | |
| neutral_btn = gr.Button("Neutralize") | |
| neutral_btn.click(neutralize_text, inputs=[text_input, model_choice], outputs=neutral_output) | |
| with gr.Tab("Analyze PDF"): | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| stats_output = gr.Markdown() | |
| table_output = gr.Dataframe(headers=["Sentence", "Bias", "Bias Score", "Bias Type", "Bias Type Score"]) | |
| analyze_btn = gr.Button("Analyze PDF") | |
| analyze_btn.click(analyze_pdf, inputs=pdf_input, outputs=[stats_output, table_output]) | |
| gr.Markdown("### Neutralize Entire PDF") | |
| model_choice_pdf = gr.Dropdown(list(neutralizer_models.keys()), label="Neutralizer Model", value="BART Neutralizer") | |
| neutral_pdf_output = gr.Textbox(label="Neutralized PDF Text", lines=15) | |
| neutral_pdf_btn = gr.Button("Neutralize PDF") | |
| neutral_pdf_btn.click(neutralize_pdf, inputs=[pdf_input, model_choice_pdf], outputs=neutral_pdf_output) | |
| if __name__ == "__main__": | |
| demo.launch() | |