Spaces:

Thilak118
/

teluguCommentToxicityDetection

Sleeping

App Files Files Community

Thilak118 commited on Oct 30, 2025

Commit

66a7e42

verified ·

1 Parent(s): 6f838d2

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -32

app.py CHANGED Viewed

@@ -3,73 +3,79 @@ import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import re
 from deep_translator import GoogleTranslator
-import requests
 # Load model & tokenizer
 model_name = "Thilak118/indic-bert-toxicity-classifier"
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
 translator = GoogleTranslator(source='en', target='te')
 def clean_text(text):
     text = re.sub(r'[^\u0C00-\u0C7F\s.,!?]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def is_telugu_text(text):
     return bool(re.search(r'[\u0C00-\u0C7F]', text))
 def transliterate_to_telugu(text):
     try:
         return translator.translate(text)
     except Exception as e:
         return f"Error in transliteration: {str(e)}"
-def log_to_render(comment, transliterated, prediction, confidence):
-    url = "https://telugu-toxicity-logger.onrender.com/log"
-    payload = {
-        "comment": comment,
-        "transliterated": transliterated,
-        "prediction": prediction,
-        "confidence": confidence
-    }
-    try:
-        requests.post(url, json=payload)
-    except Exception as e:
-        print("Logging failed:", e)
 def predict_toxicity(user_input):
     try:
         original_input = user_input
         if is_telugu_text(original_input):
             telugu_text = original_input
         else:
             telugu_text = transliterate_to_telugu(original_input)
             if "Error in transliteration" in telugu_text:
                 return telugu_text
         cleaned = clean_text(telugu_text)
         inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = model(**inputs)
         prediction = torch.argmax(outputs.logits, dim=1).item()
         prob = torch.softmax(outputs.logits, dim=1)[0]
         confidence = max(prob).item() * 100
         label = "Toxic" if prediction == 0 else "Non-Toxic"
-        # Log it to Render backend
-        log_to_render(original_input, cleaned, label, confidence)
         return f"Transliterated Telugu Text: {cleaned}\nPrediction: {label}\nConfidence: {confidence:.2f}%"
     except Exception as e:
         return f"Error: {str(e)}"
@@ -78,27 +84,31 @@ def predict_toxicity(user_input):
 with gr.Blocks() as interface:
     gr.Markdown(
         """
-        # Telugu Text Toxicity Classifier
-        Enter Telugu text in English transliteration (e.g., 'neeku' for నీకు). The app will convert it to Telugu script and predict if it's toxic or non-toxic.
-        Note: Transliteration may not always be accurate. Adjust input if needed (e.g., use 'scene' for సీన్).
         """
     )
     with gr.Row():
         english_input = gr.Textbox(
-            label="Enter Telugu Text (in English Transliteration)",
-            placeholder="e.g., chala baagundhi",
             lines=2
         )
         telugu_preview = gr.Textbox(
             label="Transliterated Telugu Text (Preview)",
-            interactive=True,
             lines=2
         )
     preview_button = gr.Button("Preview Transliteration")
-    predict_button = gr.Button("Predict Toxicity")
     output = gr.Textbox(label="Prediction Output", lines=5)
     preview_button.click(
         fn=transliterate_to_telugu,
         inputs=english_input,
@@ -111,12 +121,4 @@ with gr.Blocks() as interface:
         outputs=output
     )
-    # ✅ Admin Logs Button at Bottom
-    with gr.Row():
-        gr.Markdown(
-            "<a href='https://telugu-toxicity-logger.onrender.com/logs' target='_blank'>"
-            "<button style='padding: 10px; font-weight: bold;'>🔐 View Admin Logs</button>"
-            "</a>"
-        )
 interface.launch()

 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import re
 from deep_translator import GoogleTranslator
 # Load model & tokenizer
 model_name = "Thilak118/indic-bert-toxicity-classifier"
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
+# Initialize translator
+# Note: GoogleTranslator's source is 'auto' by default, but you had 'en'.
+# Keeping it as 'en' to align with the intent of translating English transliteration to Telugu.
 translator = GoogleTranslator(source='en', target='te')
 def clean_text(text):
+    # Keep only Telugu characters (Unicode range \u0C00-\u0C7F), spaces, and basic punctuation
     text = re.sub(r'[^\u0C00-\u0C7F\s.,!?]', '', text)
+    # Collapse multiple spaces into a single space and strip leading/trailing spaces
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def is_telugu_text(text):
+    # Check if the text contains any Telugu script characters
     return bool(re.search(r'[\u0C00-\u0C7F]', text))
 def transliterate_to_telugu(text):
+    """
+    Translates English transliteration (or any non-Telugu text) to Telugu script.
+    """
     try:
+        # The deep_translator's GoogleTranslator is used for this
         return translator.translate(text)
     except Exception as e:
         return f"Error in transliteration: {str(e)}"
 def predict_toxicity(user_input):
+    """
+    Processes user input, converts to Telugu if necessary, cleans it,
+    and predicts toxicity using the Hugging Face model.
+    """
     try:
         original_input = user_input
+        # Check if the input is already in Telugu
         if is_telugu_text(original_input):
             telugu_text = original_input
         else:
+            # Transliterate (translate) the English input to Telugu
             telugu_text = transliterate_to_telugu(original_input)
             if "Error in transliteration" in telugu_text:
                 return telugu_text
+        # Clean the Telugu text (remove non-Telugu, non-punctuation chars)
         cleaned = clean_text(telugu_text)
+        # Tokenize and prepare inputs for the model
         inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
         inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Run inference
         with torch.no_grad():
             outputs = model(**inputs)
+        # Process results
         prediction = torch.argmax(outputs.logits, dim=1).item()
         prob = torch.softmax(outputs.logits, dim=1)[0]
         confidence = max(prob).item() * 100
+        # Assuming 0 is Toxic and 1 is Non-Toxic based on typical binary classification
         label = "Toxic" if prediction == 0 else "Non-Toxic"
+        # Return the prediction result
         return f"Transliterated Telugu Text: {cleaned}\nPrediction: {label}\nConfidence: {confidence:.2f}%"
     except Exception as e:
         return f"Error: {str(e)}"
 with gr.Blocks() as interface:
     gr.Markdown(
         """
+        # 🇮🇳 Telugu Text Toxicity Classifier
+        Enter Telugu text, typically in **English transliteration** (e.g., 'neeku' for నీకు).
+        The application will first attempt to convert it to the Telugu script, clean it, and then
+        predict if the resulting Telugu text is **Toxic** or **Non-Toxic**.
+        *Note: The transliteration step uses an external service and may not always be perfectly accurate. Adjust your input if necessary.*
         """
     )
     with gr.Row():
         english_input = gr.Textbox(
+            label="Enter Telugu Text (in English Transliteration or Telugu Script)",
+            placeholder="e.g., chala baagundhi or చాలా బాగుంది",
             lines=2
         )
         telugu_preview = gr.Textbox(
             label="Transliterated Telugu Text (Preview)",
+            interactive=False, # Changed to False as it's a preview/output
             lines=2
         )
     preview_button = gr.Button("Preview Transliteration")
+    predict_button = gr.Button("Predict Toxicity", variant="primary")
     output = gr.Textbox(label="Prediction Output", lines=5)
+    # Event handlers
     preview_button.click(
         fn=transliterate_to_telugu,
         inputs=english_input,
         outputs=output
     )
 interface.launch()