| | import re |
| | import torch |
| | import gradio as gr |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | from deep_translator import GoogleTranslator |
| |
|
| | |
| | MODEL_PATH = "Thilak118/indic-bert-toxicity-classifier_kannada" |
| |
|
| | model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) |
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model.to(device) |
| | model.eval() |
| |
|
| | |
| | translator = GoogleTranslator(source="en", target="kn") |
| |
|
| | |
| | def clean_text(text): |
| | text = re.sub(r"[^\u0C80-\u0CFF\s.,!?]", "", str(text)) |
| | text = re.sub(r"\s+", " ", text).strip() |
| | return text |
| |
|
| | |
| | def transliterate_to_kannada(text): |
| | if text and text.strip(): |
| | try: |
| | return translator.translate(text) |
| | except Exception: |
| | return "Translation failed" |
| | return "" |
| |
|
| | |
| | def predict_toxicity(english_text): |
| | kannada_text = transliterate_to_kannada(english_text) |
| |
|
| | if "failed" in kannada_text.lower(): |
| | return f"Transliterated Kannada Text: {kannada_text}\nPrediction: Failed" |
| |
|
| | cleaned_text = clean_text(kannada_text) |
| |
|
| | inputs = tokenizer( |
| | cleaned_text, |
| | return_tensors="pt", |
| | padding=True, |
| | truncation=True, |
| | max_length=128 |
| | ).to(device) |
| |
|
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| |
|
| | prediction = torch.argmax(outputs.logits, dim=1).item() |
| | probs = torch.softmax(outputs.logits, dim=1)[0] |
| | confidence = probs[prediction].item() * 100 |
| |
|
| | label = "Toxic" if prediction == 0 else "Non-Toxic" |
| |
|
| | return ( |
| | f"Transliterated Kannada Text: {cleaned_text}\n" |
| | f"Prediction: {label}\n" |
| | f"Confidence: {confidence:.2f}%" |
| | ) |
| |
|
| | |
| | with gr.Blocks(title="Kannada Text Toxicity Classifier (IndicBERT)") as demo: |
| | gr.Markdown( |
| | """ |
| | # Kannada Text Toxicity Classifier (IndicBERT + Transliteration) |
| | |
| | Enter Kannada text written in **English letters** |
| | Example: |
| | **`ninage ashtondu seen illa le`** |
| | |
| | The app converts it into **Kannada script** and predicts toxicity. |
| | """ |
| | ) |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | input_text = gr.Textbox( |
| | label="Enter Kannada Text (English Transliteration)", |
| | placeholder="e.g., ninage ashtondu seen illa le", |
| | lines=2 |
| | ) |
| | with gr.Column(): |
| | transliterated_text = gr.Textbox( |
| | label="Kannada Script (Preview)", |
| | interactive=False, |
| | lines=2 |
| | ) |
| |
|
| | with gr.Row(): |
| | preview_btn = gr.Button("Preview Transliteration") |
| | predict_btn = gr.Button("Predict Toxicity") |
| |
|
| | output_text = gr.Textbox( |
| | label="Prediction Output", |
| | interactive=False, |
| | lines=5 |
| | ) |
| |
|
| | preview_btn.click( |
| | fn=transliterate_to_kannada, |
| | inputs=input_text, |
| | outputs=transliterated_text |
| | ) |
| |
|
| | predict_btn.click( |
| | fn=predict_toxicity, |
| | inputs=input_text, |
| | outputs=output_text |
| | ) |
| |
|
| | |
| | demo.launch() |
| |
|