Upload folder using huggingface_hub
Browse files- hate_speech_demo.py +45 -71
hate_speech_demo.py
CHANGED
|
@@ -84,7 +84,6 @@ def process_retrieval_text(retrieval_text, user_input):
|
|
| 84 |
return "<br><br>".join(highlighted_chunks)
|
| 85 |
|
| 86 |
# API Keys - hardcoded for convenience
|
| 87 |
-
# Replace these with your actual API keys
|
| 88 |
ORACLE_API_KEY = "key-HgVH3QX0GkyPKZhS3l3QrnLAqvjR2shrPPb_WK3lmrWHPzeKU"
|
| 89 |
TOGETHER_API_KEY = "25e1acc0998143afee6b7cb3cb4a9447d39166be767a13a36a22da64234343de"
|
| 90 |
OPENAI_API_KEY = "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"
|
|
@@ -320,74 +319,9 @@ h1, h2, h3, h4, h5, h6, p, span, div, button, input, textarea, label {
|
|
| 320 |
}
|
| 321 |
"""
|
| 322 |
|
| 323 |
-
# Contextual API class - UPDATED WITH NEW MODEL ID
|
| 324 |
-
class ContextualAPIUtils:
|
| 325 |
-
def __init__(self, api_key):
|
| 326 |
-
self.api_key = api_key
|
| 327 |
-
# Updated to new model ID
|
| 328 |
-
self.model_id = "92ab273b-378f-4b52-812b-7ec21506e49b"
|
| 329 |
-
self.endpoint_url = f"https://api.contextual.ai/v1/agents/{self.model_id}/query"
|
| 330 |
-
|
| 331 |
-
def chat(self, prompt):
|
| 332 |
-
url = f"{self.endpoint_url}?retrievals_only=false&include_retrieval_content_text=true"
|
| 333 |
-
headers = {
|
| 334 |
-
"accept": "application/json",
|
| 335 |
-
"content-type": "application/json",
|
| 336 |
-
"authorization": f"Bearer {self.api_key}",
|
| 337 |
-
}
|
| 338 |
-
body = {
|
| 339 |
-
"stream": False,
|
| 340 |
-
"messages": [{"role": "user", "content": prompt}],
|
| 341 |
-
}
|
| 342 |
-
|
| 343 |
-
start_time = time.time()
|
| 344 |
-
try:
|
| 345 |
-
response = requests.post(url, headers=headers, json=body)
|
| 346 |
-
response.raise_for_status()
|
| 347 |
-
response_json = response.json()
|
| 348 |
-
|
| 349 |
-
response_content = response_json.get("message", {}).get("content", "No content received.")
|
| 350 |
-
retrieved_texts = [
|
| 351 |
-
f"Doc: {item.get('doc_name', 'Unknown')}, Page: {item.get('page', 'N/A')}\n"
|
| 352 |
-
f"Content: {item.get('content_text', 'No Content')}"
|
| 353 |
-
for item in response_json.get("retrieval_contents", [])
|
| 354 |
-
]
|
| 355 |
-
retrieval_text = "\n\n".join(retrieved_texts) if retrieved_texts else "No retrieval text found."
|
| 356 |
-
error_message = ""
|
| 357 |
-
|
| 358 |
-
except requests.exceptions.RequestException as e:
|
| 359 |
-
response_content = "API Request Failed"
|
| 360 |
-
retrieval_text = ""
|
| 361 |
-
error_message = str(e)
|
| 362 |
-
|
| 363 |
-
response_time = round(time.time() - start_time, 4)
|
| 364 |
-
return response_content, response_time, retrieval_text, error_message
|
| 365 |
-
|
| 366 |
-
# Contextual AI rating - UPDATED PROMPT FORMAT
|
| 367 |
-
def get_contextual_rating(contextual_api, user_input):
|
| 368 |
-
rating_prompt = f"Content to rate: {user_input}"
|
| 369 |
-
response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
|
| 370 |
-
if error:
|
| 371 |
-
return f"Error: {error}", "", "unsafe"
|
| 372 |
-
|
| 373 |
-
# Determine safety level based on response content
|
| 374 |
-
safety_level = "safe"
|
| 375 |
-
if "out of policy" in response_text.lower() or "unsafe" in response_text.lower():
|
| 376 |
-
safety_level = "unsafe"
|
| 377 |
-
elif "caution" in response_text.lower() or "warning" in response_text.lower():
|
| 378 |
-
safety_level = "warning"
|
| 379 |
-
|
| 380 |
-
return response_text, retrieval_text, safety_level
|
| 381 |
-
|
| 382 |
# LlamaGuard rating
|
| 383 |
-
LLAMA_HARM_CATEGORIES = {
|
| 384 |
-
"S1": "Hate Speech", "S2": "Harassment", "S3": "Violence & Threats",
|
| 385 |
-
"S4": "Sexual Content", "S5": "Self-Harm & Suicide", "S6": "Child Sexual Exploitation",
|
| 386 |
-
"S7": "Dangerous & Illegal Activities", "S8": "Misinformation", "S9": "Political Content",
|
| 387 |
-
"S10": "Spam & Scams", "S11": "Privacy Violations", "S12": "Defamation", "S13": "Other Unsafe Content"
|
| 388 |
-
}
|
| 389 |
-
|
| 390 |
def get_llama_guard_rating(together_client, user_input):
|
|
|
|
| 391 |
chat_input = [{"role": "user", "content": user_input}]
|
| 392 |
try:
|
| 393 |
response = together_client.chat.completions.create(
|
|
@@ -410,6 +344,7 @@ def get_llama_guard_rating(together_client, user_input):
|
|
| 410 |
|
| 411 |
# OpenAI moderation
|
| 412 |
def get_openai_moderation(openai_client, user_input):
|
|
|
|
| 413 |
try:
|
| 414 |
start_time = time.time()
|
| 415 |
response = openai_client.moderations.create(input=user_input, model="omni-moderation-latest")
|
|
@@ -441,6 +376,23 @@ def get_openai_moderation(openai_client, user_input):
|
|
| 441 |
return formatted_result, safety_level
|
| 442 |
except Exception as e:
|
| 443 |
return f"Safety Status: Error\nError: {str(e)}", "unsafe"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
# Perspective AI
|
| 446 |
def get_perspective_rating(user_input):
|
|
@@ -549,22 +501,43 @@ def rate_user_input(user_input):
|
|
| 549 |
""
|
| 550 |
)
|
| 551 |
|
| 552 |
-
# Get ratings
|
| 553 |
try:
|
| 554 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
except Exception as e:
|
| 556 |
llama_rating = f"Error getting LlamaGuard rating: {str(e)}"
|
| 557 |
llama_safety = "unsafe"
|
| 558 |
|
| 559 |
try:
|
| 560 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
except Exception as e:
|
| 562 |
contextual_rating = f"Error getting Contextual rating: {str(e)}"
|
| 563 |
contextual_retrieval = ""
|
| 564 |
contextual_safety = "unsafe"
|
| 565 |
|
| 566 |
try:
|
| 567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
except Exception as e:
|
| 569 |
openai_rating = f"Error getting OpenAI rating: {str(e)}"
|
| 570 |
openai_safety = "unsafe"
|
|
@@ -645,6 +618,7 @@ def rate_user_input(user_input):
|
|
| 645 |
error_message = f"An error occurred: {str(e)}"
|
| 646 |
error_html = f'<div class="rating-box empty-rating">{error_message}</div>'
|
| 647 |
return error_html, error_html, error_html, error_html, ""
|
|
|
|
| 648 |
|
| 649 |
def random_test_case():
|
| 650 |
try:
|
|
|
|
| 84 |
return "<br><br>".join(highlighted_chunks)
|
| 85 |
|
| 86 |
# API Keys - hardcoded for convenience
|
|
|
|
| 87 |
ORACLE_API_KEY = "key-HgVH3QX0GkyPKZhS3l3QrnLAqvjR2shrPPb_WK3lmrWHPzeKU"
|
| 88 |
TOGETHER_API_KEY = "25e1acc0998143afee6b7cb3cb4a9447d39166be767a13a36a22da64234343de"
|
| 89 |
OPENAI_API_KEY = "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"
|
|
|
|
| 319 |
}
|
| 320 |
"""
|
| 321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
# LlamaGuard rating
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
def get_llama_guard_rating(together_client, user_input):
|
| 324 |
+
"""Get content rating from LlamaGuard."""
|
| 325 |
chat_input = [{"role": "user", "content": user_input}]
|
| 326 |
try:
|
| 327 |
response = together_client.chat.completions.create(
|
|
|
|
| 344 |
|
| 345 |
# OpenAI moderation
|
| 346 |
def get_openai_moderation(openai_client, user_input):
|
| 347 |
+
"""Get content rating from OpenAI Moderation API."""
|
| 348 |
try:
|
| 349 |
start_time = time.time()
|
| 350 |
response = openai_client.moderations.create(input=user_input, model="omni-moderation-latest")
|
|
|
|
| 376 |
return formatted_result, safety_level
|
| 377 |
except Exception as e:
|
| 378 |
return f"Safety Status: Error\nError: {str(e)}", "unsafe"
|
| 379 |
+
|
| 380 |
+
# Contextual API rating
|
| 381 |
+
def get_contextual_rating(contextual_api, user_input):
|
| 382 |
+
"""Get content rating from Contextual Safety Oracle."""
|
| 383 |
+
rating_prompt = f"Rate this content as Within Policy or Out of Policy and provide your explanation:\n{user_input}"
|
| 384 |
+
response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
|
| 385 |
+
if error:
|
| 386 |
+
return f"Error: {error}", "", "unsafe"
|
| 387 |
+
|
| 388 |
+
# Determine safety level based on response content
|
| 389 |
+
safety_level = "safe"
|
| 390 |
+
if "out of policy" in response_text.lower():
|
| 391 |
+
safety_level = "unsafe"
|
| 392 |
+
elif "caution" in response_text.lower() or "warning" in response_text.lower():
|
| 393 |
+
safety_level = "warning"
|
| 394 |
+
|
| 395 |
+
return response_text, retrieval_text, safety_level
|
| 396 |
|
| 397 |
# Perspective AI
|
| 398 |
def get_perspective_rating(user_input):
|
|
|
|
| 501 |
""
|
| 502 |
)
|
| 503 |
|
| 504 |
+
# Get ratings - check function signatures and call correctly
|
| 505 |
try:
|
| 506 |
+
# Check the function signature before deciding how to call it
|
| 507 |
+
import inspect
|
| 508 |
+
|
| 509 |
+
# LlamaGuard rating
|
| 510 |
+
llama_params = len(inspect.signature(get_llama_guard_rating).parameters)
|
| 511 |
+
if llama_params == 2:
|
| 512 |
+
llama_rating, llama_safety = get_llama_guard_rating(together_client, user_input)
|
| 513 |
+
else:
|
| 514 |
+
# Original expected 3 params
|
| 515 |
+
llama_rating, llama_safety = get_llama_guard_rating(together_client, user_input, user_input)
|
| 516 |
except Exception as e:
|
| 517 |
llama_rating = f"Error getting LlamaGuard rating: {str(e)}"
|
| 518 |
llama_safety = "unsafe"
|
| 519 |
|
| 520 |
try:
|
| 521 |
+
# Contextual rating
|
| 522 |
+
contextual_params = len(inspect.signature(get_contextual_rating).parameters)
|
| 523 |
+
if contextual_params == 2:
|
| 524 |
+
contextual_rating, contextual_retrieval, contextual_safety = get_contextual_rating(contextual_api, user_input)
|
| 525 |
+
else:
|
| 526 |
+
# Original expected 3 params
|
| 527 |
+
contextual_rating, contextual_retrieval, contextual_safety = get_contextual_rating(contextual_api, user_input, user_input)
|
| 528 |
except Exception as e:
|
| 529 |
contextual_rating = f"Error getting Contextual rating: {str(e)}"
|
| 530 |
contextual_retrieval = ""
|
| 531 |
contextual_safety = "unsafe"
|
| 532 |
|
| 533 |
try:
|
| 534 |
+
# OpenAI moderation
|
| 535 |
+
openai_params = len(inspect.signature(get_openai_moderation).parameters)
|
| 536 |
+
if openai_params == 2:
|
| 537 |
+
openai_rating, openai_safety = get_openai_moderation(openai_client, user_input)
|
| 538 |
+
else:
|
| 539 |
+
# Original expected 3 params
|
| 540 |
+
openai_rating, openai_safety = get_openai_moderation(openai_client, user_input, user_input)
|
| 541 |
except Exception as e:
|
| 542 |
openai_rating = f"Error getting OpenAI rating: {str(e)}"
|
| 543 |
openai_safety = "unsafe"
|
|
|
|
| 618 |
error_message = f"An error occurred: {str(e)}"
|
| 619 |
error_html = f'<div class="rating-box empty-rating">{error_message}</div>'
|
| 620 |
return error_html, error_html, error_html, error_html, ""
|
| 621 |
+
|
| 622 |
|
| 623 |
def random_test_case():
|
| 624 |
try:
|