File size: 3,542 Bytes
3648b06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import re
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from deep_translator import GoogleTranslator

# -------------------- Load Kannada Model & Tokenizer --------------------
MODEL_PATH = "Thilak118/indic-bert-toxicity-classifier_kannada"

model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# -------------------- Translator (English → Kannada) --------------------
translator = GoogleTranslator(source="en", target="kn")

# -------------------- Clean Kannada Text --------------------
def clean_text(text):
    text = re.sub(r"[^\u0C80-\u0CFF\s.,!?]", "", str(text))
    text = re.sub(r"\s+", " ", text).strip()
    return text

# -------------------- Transliterate / Translate to Kannada --------------------
def transliterate_to_kannada(text):
    if text and text.strip():
        try:
            return translator.translate(text)
        except Exception:
            return "Translation failed"
    return ""

# -------------------- Toxicity Prediction --------------------
def predict_toxicity(english_text):
    kannada_text = transliterate_to_kannada(english_text)

    if "failed" in kannada_text.lower():
        return f"Transliterated Kannada Text: {kannada_text}\nPrediction: Failed"

    cleaned_text = clean_text(kannada_text)

    inputs = tokenizer(
        cleaned_text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=128
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    prediction = torch.argmax(outputs.logits, dim=1).item()
    probs = torch.softmax(outputs.logits, dim=1)[0]
    confidence = probs[prediction].item() * 100

    label = "Toxic" if prediction == 0 else "Non-Toxic"

    return (
        f"Transliterated Kannada Text: {cleaned_text}\n"
        f"Prediction: {label}\n"
        f"Confidence: {confidence:.2f}%"
    )

# -------------------- Gradio UI --------------------
with gr.Blocks(title="Kannada Text Toxicity Classifier (IndicBERT)") as demo:
    gr.Markdown(
        """
        # Kannada Text Toxicity Classifier (IndicBERT + Transliteration)

        Enter Kannada text written in **English letters**  
        Example:  
        **`ninage ashtondu seen illa le`**

        The app converts it into **Kannada script** and predicts toxicity.
        """
    )

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Enter Kannada Text (English Transliteration)",
                placeholder="e.g., ninage ashtondu seen illa le",
                lines=2
            )
        with gr.Column():
            transliterated_text = gr.Textbox(
                label="Kannada Script (Preview)",
                interactive=False,
                lines=2
            )

    with gr.Row():
        preview_btn = gr.Button("Preview Transliteration")
        predict_btn = gr.Button("Predict Toxicity")

    output_text = gr.Textbox(
        label="Prediction Output",
        interactive=False,
        lines=5
    )

    preview_btn.click(
        fn=transliterate_to_kannada,
        inputs=input_text,
        outputs=transliterated_text
    )

    predict_btn.click(
        fn=predict_toxicity,
        inputs=input_text,
        outputs=output_text
    )

# -------------------- Launch App --------------------
demo.launch()