alohaboy commited on
Commit ·
2185d4b
1
Parent(s): 449e8c9
Fix indentation error in guided mitigation methods
Browse files
app.py
CHANGED
|
@@ -404,15 +404,6 @@ Mitigated sentence:"""
|
|
| 404 |
if hate_tokens:
|
| 405 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 406 |
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
| 407 |
-
label_desc = {
|
| 408 |
-
"offensive": "Aggressive",
|
| 409 |
-
"L1_hate": "Mild Hate",
|
| 410 |
-
"L2_hate": "Severe Hate"
|
| 411 |
-
}
|
| 412 |
-
hate_tokens_str = ""
|
| 413 |
-
if hate_tokens:
|
| 414 |
-
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 415 |
-
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
| 416 |
# LLM inference
|
| 417 |
inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
|
| 418 |
with torch.no_grad():
|
|
@@ -488,15 +479,6 @@ Mitigated sentence:"""
|
|
| 488 |
if hate_tokens:
|
| 489 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 490 |
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
| 491 |
-
label_desc = {
|
| 492 |
-
"offensive": "Aggressive",
|
| 493 |
-
"L1_hate": "Mild Hate",
|
| 494 |
-
"L2_hate": "Severe Hate"
|
| 495 |
-
}
|
| 496 |
-
hate_tokens_str = ""
|
| 497 |
-
if hate_tokens:
|
| 498 |
-
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 499 |
-
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
| 500 |
# Iterative mitigation and evaluation
|
| 501 |
max_iter = 3 # Reduced from 5 to 3 for Space deployment
|
| 502 |
metrics_history = []
|
|
|
|
| 404 |
if hate_tokens:
|
| 405 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 406 |
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
# LLM inference
|
| 408 |
inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
|
| 409 |
with torch.no_grad():
|
|
|
|
| 479 |
if hate_tokens:
|
| 480 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
| 481 |
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
# Iterative mitigation and evaluation
|
| 483 |
max_iter = 3 # Reduced from 5 to 3 for Space deployment
|
| 484 |
metrics_history = []
|