Thilak118 commited on
Commit
66a7e42
·
verified ·
1 Parent(s): 6f838d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -32
app.py CHANGED
@@ -3,73 +3,79 @@ import torch
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import re
5
  from deep_translator import GoogleTranslator
6
- import requests
7
 
8
  # Load model & tokenizer
9
  model_name = "Thilak118/indic-bert-toxicity-classifier"
10
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
 
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  model.to(device)
15
  model.eval()
16
 
 
 
 
17
  translator = GoogleTranslator(source='en', target='te')
18
 
19
  def clean_text(text):
 
20
  text = re.sub(r'[^\u0C00-\u0C7F\s.,!?]', '', text)
 
21
  text = re.sub(r'\s+', ' ', text).strip()
22
  return text
23
 
24
  def is_telugu_text(text):
 
25
  return bool(re.search(r'[\u0C00-\u0C7F]', text))
26
 
27
  def transliterate_to_telugu(text):
 
 
 
28
  try:
 
29
  return translator.translate(text)
30
  except Exception as e:
31
  return f"Error in transliteration: {str(e)}"
32
 
33
- def log_to_render(comment, transliterated, prediction, confidence):
34
- url = "https://telugu-toxicity-logger.onrender.com/log"
35
- payload = {
36
- "comment": comment,
37
- "transliterated": transliterated,
38
- "prediction": prediction,
39
- "confidence": confidence
40
- }
41
- try:
42
- requests.post(url, json=payload)
43
- except Exception as e:
44
- print("Logging failed:", e)
45
-
46
  def predict_toxicity(user_input):
 
 
 
 
47
  try:
48
  original_input = user_input
49
 
 
50
  if is_telugu_text(original_input):
51
  telugu_text = original_input
52
  else:
 
53
  telugu_text = transliterate_to_telugu(original_input)
54
  if "Error in transliteration" in telugu_text:
55
  return telugu_text
56
 
 
57
  cleaned = clean_text(telugu_text)
58
 
 
59
  inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
60
  inputs = {k: v.to(device) for k, v in inputs.items()}
61
 
 
62
  with torch.no_grad():
63
  outputs = model(**inputs)
64
 
 
65
  prediction = torch.argmax(outputs.logits, dim=1).item()
66
  prob = torch.softmax(outputs.logits, dim=1)[0]
67
  confidence = max(prob).item() * 100
 
68
  label = "Toxic" if prediction == 0 else "Non-Toxic"
69
 
70
- # Log it to Render backend
71
- log_to_render(original_input, cleaned, label, confidence)
72
-
73
  return f"Transliterated Telugu Text: {cleaned}\nPrediction: {label}\nConfidence: {confidence:.2f}%"
74
  except Exception as e:
75
  return f"Error: {str(e)}"
@@ -78,27 +84,31 @@ def predict_toxicity(user_input):
78
  with gr.Blocks() as interface:
79
  gr.Markdown(
80
  """
81
- # Telugu Text Toxicity Classifier
82
- Enter Telugu text in English transliteration (e.g., 'neeku' for నీకు). The app will convert it to Telugu script and predict if it's toxic or non-toxic.
83
- Note: Transliteration may not always be accurate. Adjust input if needed (e.g., use 'scene' for సీన్).
 
 
 
84
  """
85
  )
86
  with gr.Row():
87
  english_input = gr.Textbox(
88
- label="Enter Telugu Text (in English Transliteration)",
89
- placeholder="e.g., chala baagundhi",
90
  lines=2
91
  )
92
  telugu_preview = gr.Textbox(
93
  label="Transliterated Telugu Text (Preview)",
94
- interactive=True,
95
  lines=2
96
  )
97
 
98
  preview_button = gr.Button("Preview Transliteration")
99
- predict_button = gr.Button("Predict Toxicity")
100
  output = gr.Textbox(label="Prediction Output", lines=5)
101
 
 
102
  preview_button.click(
103
  fn=transliterate_to_telugu,
104
  inputs=english_input,
@@ -111,12 +121,4 @@ with gr.Blocks() as interface:
111
  outputs=output
112
  )
113
 
114
- # ✅ Admin Logs Button at Bottom
115
- with gr.Row():
116
- gr.Markdown(
117
- "<a href='https://telugu-toxicity-logger.onrender.com/logs' target='_blank'>"
118
- "<button style='padding: 10px; font-weight: bold;'>🔐 View Admin Logs</button>"
119
- "</a>"
120
- )
121
-
122
  interface.launch()
 
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import re
5
  from deep_translator import GoogleTranslator
 
6
 
7
  # Load model & tokenizer
8
  model_name = "Thilak118/indic-bert-toxicity-classifier"
9
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
+ # Set device
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  model.to(device)
15
  model.eval()
16
 
17
+ # Initialize translator
18
+ # Note: GoogleTranslator's source is 'auto' by default, but you had 'en'.
19
+ # Keeping it as 'en' to align with the intent of translating English transliteration to Telugu.
20
  translator = GoogleTranslator(source='en', target='te')
21
 
22
  def clean_text(text):
23
+ # Keep only Telugu characters (Unicode range \u0C00-\u0C7F), spaces, and basic punctuation
24
  text = re.sub(r'[^\u0C00-\u0C7F\s.,!?]', '', text)
25
+ # Collapse multiple spaces into a single space and strip leading/trailing spaces
26
  text = re.sub(r'\s+', ' ', text).strip()
27
  return text
28
 
29
  def is_telugu_text(text):
30
+ # Check if the text contains any Telugu script characters
31
  return bool(re.search(r'[\u0C00-\u0C7F]', text))
32
 
33
  def transliterate_to_telugu(text):
34
+ """
35
+ Translates English transliteration (or any non-Telugu text) to Telugu script.
36
+ """
37
  try:
38
+ # The deep_translator's GoogleTranslator is used for this
39
  return translator.translate(text)
40
  except Exception as e:
41
  return f"Error in transliteration: {str(e)}"
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def predict_toxicity(user_input):
44
+ """
45
+ Processes user input, converts to Telugu if necessary, cleans it,
46
+ and predicts toxicity using the Hugging Face model.
47
+ """
48
  try:
49
  original_input = user_input
50
 
51
+ # Check if the input is already in Telugu
52
  if is_telugu_text(original_input):
53
  telugu_text = original_input
54
  else:
55
+ # Transliterate (translate) the English input to Telugu
56
  telugu_text = transliterate_to_telugu(original_input)
57
  if "Error in transliteration" in telugu_text:
58
  return telugu_text
59
 
60
+ # Clean the Telugu text (remove non-Telugu, non-punctuation chars)
61
  cleaned = clean_text(telugu_text)
62
 
63
+ # Tokenize and prepare inputs for the model
64
  inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
65
  inputs = {k: v.to(device) for k, v in inputs.items()}
66
 
67
+ # Run inference
68
  with torch.no_grad():
69
  outputs = model(**inputs)
70
 
71
+ # Process results
72
  prediction = torch.argmax(outputs.logits, dim=1).item()
73
  prob = torch.softmax(outputs.logits, dim=1)[0]
74
  confidence = max(prob).item() * 100
75
+ # Assuming 0 is Toxic and 1 is Non-Toxic based on typical binary classification
76
  label = "Toxic" if prediction == 0 else "Non-Toxic"
77
 
78
+ # Return the prediction result
 
 
79
  return f"Transliterated Telugu Text: {cleaned}\nPrediction: {label}\nConfidence: {confidence:.2f}%"
80
  except Exception as e:
81
  return f"Error: {str(e)}"
 
84
  with gr.Blocks() as interface:
85
  gr.Markdown(
86
  """
87
+ # 🇮🇳 Telugu Text Toxicity Classifier
88
+ Enter Telugu text, typically in **English transliteration** (e.g., 'neeku' for నీకు).
89
+ The application will first attempt to convert it to the Telugu script, clean it, and then
90
+ predict if the resulting Telugu text is **Toxic** or **Non-Toxic**.
91
+
92
+ *Note: The transliteration step uses an external service and may not always be perfectly accurate. Adjust your input if necessary.*
93
  """
94
  )
95
  with gr.Row():
96
  english_input = gr.Textbox(
97
+ label="Enter Telugu Text (in English Transliteration or Telugu Script)",
98
+ placeholder="e.g., chala baagundhi or చాలా బాగుంది",
99
  lines=2
100
  )
101
  telugu_preview = gr.Textbox(
102
  label="Transliterated Telugu Text (Preview)",
103
+ interactive=False, # Changed to False as it's a preview/output
104
  lines=2
105
  )
106
 
107
  preview_button = gr.Button("Preview Transliteration")
108
+ predict_button = gr.Button("Predict Toxicity", variant="primary")
109
  output = gr.Textbox(label="Prediction Output", lines=5)
110
 
111
+ # Event handlers
112
  preview_button.click(
113
  fn=transliterate_to_telugu,
114
  inputs=english_input,
 
121
  outputs=output
122
  )
123
 
 
 
 
 
 
 
 
 
124
  interface.launch()