Spaces:

Thilak118
/

TamilCommentsToxicityDetection

Sleeping

App Files Files Community

Thilak118 commited on Dec 28, 2025

Commit

51cff32

verified ·

1 Parent(s): 022dc69

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -13

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import re
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from indic_transliteration.sanscript import transliterate, ITRANS, TAMIL
 MODEL_PATH = "Thilak118/indic-bert-toxicity-classifier_tamil"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
@@ -13,25 +16,41 @@ model.eval()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 def transliterate_to_tamil(text):
     if text and text.strip():
-        return transliterate(text, ITRANS, TAMIL)
     return ""
 def clean_text(text):
-    text = re.sub(r'[^\u0B80-\u0BFF\s.,!?]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def predict_toxicity(input_text):
     ta_text = transliterate_to_tamil(input_text)
     cleaned_text = clean_text(ta_text)
     inputs = tokenizer(
         cleaned_text,
         return_tensors="pt",
-        padding=True,
         truncation=True,
         max_length=128
     ).to(device)
@@ -50,23 +69,52 @@ def predict_toxicity(input_text):
         f"Confidence: {confidence:.2f}%"
     )
-with gr.Blocks(title="Tamil Toxicity Classifier") as demo:
     gr.Markdown(
         """
         # Tamil Text Toxicity Classifier 🇮🇳
-        Enter **English transliteration**
-        Example: `nee romba mosam`
         """
     )
-    input_text = gr.Textbox(label="Enter Text (English)", lines=2)
-    preview = gr.Textbox(label="Tamil Text", interactive=False)
-    output = gr.Textbox(label="Prediction", lines=4)
-    preview_btn = gr.Button("Preview Tamil Text")
-    predict_btn = gr.Button("Predict Toxicity")
-    preview_btn.click(transliterate_to_tamil, input_text, preview)
-    predict_btn.click(predict_toxicity, input_text, output)
 demo.launch()

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from indic_transliteration.sanscript import transliterate, ITRANS, TAMIL
+# -----------------------------
+# Load Model & Tokenizer
+# -----------------------------
 MODEL_PATH = "Thilak118/indic-bert-toxicity-classifier_tamil"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+# -----------------------------
+# Tamil Transliteration (Tanglish → Tamil)
+# -----------------------------
 def transliterate_to_tamil(text):
     if text and text.strip():
+        try:
+            return transliterate(text, ITRANS, TAMIL)
+        except Exception:
+            return "Transliteration failed"
     return ""
+# -----------------------------
+# Text Cleaning (Tamil)
+# -----------------------------
 def clean_text(text):
+    text = re.sub(r'[^\u0B80-\u0BFFa-zA-Z0-9\s.,!?]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
+# -----------------------------
+# Prediction
+# -----------------------------
 def predict_toxicity(input_text):
     ta_text = transliterate_to_tamil(input_text)
+    if "failed" in ta_text.lower():
+        return f"Tamil Text: {ta_text}\nPrediction: Failed"
     cleaned_text = clean_text(ta_text)
     inputs = tokenizer(
         cleaned_text,
         return_tensors="pt",
         truncation=True,
+        padding=True,
         max_length=128
     ).to(device)
         f"Confidence: {confidence:.2f}%"
     )
+# -----------------------------
+# Gradio UI (Same as Malayalam)
+# -----------------------------
+with gr.Blocks(title="Tamil Text Toxicity Classifier") as demo:
     gr.Markdown(
         """
         # Tamil Text Toxicity Classifier 🇮🇳
+        Enter Tamil text in **English transliteration (Tanglish)**
+        Example: `nee romba mosamaanavan`
         """
     )
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Enter Text (English Transliteration)",
+                placeholder="nee romba mosamaanavan",
+                lines=2
+            )
+        with gr.Column():
+            preview_text = gr.Textbox(
+                label="Tamil Text",
+                interactive=False,
+                lines=2
+            )
+    with gr.Row():
+        preview_btn = gr.Button("Preview Transliteration")
+        predict_btn = gr.Button("Predict Toxicity")
+    output_text = gr.Textbox(
+        label="Prediction Output",
+        interactive=False,
+        lines=5
+    )
+    preview_btn.click(
+        fn=transliterate_to_tamil,
+        inputs=input_text,
+        outputs=preview_text
+    )
+    predict_btn.click(
+        fn=predict_toxicity,
+        inputs=input_text,
+        outputs=output_text
+    )
 demo.launch()