Upload folder using huggingface_hub
Browse files- hate_speech_demo.py +126 -52
hate_speech_demo.py
CHANGED
|
@@ -14,11 +14,11 @@ import json
|
|
| 14 |
# Load environment variables (for local development)
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
-
# Process retrieval text to highlight relevant parts
|
| 18 |
def process_retrieval_text(retrieval_text, user_input):
|
| 19 |
"""
|
| 20 |
Process the retrieval text by identifying proper document boundaries
|
| 21 |
-
and highlighting relevant keywords.
|
| 22 |
"""
|
| 23 |
if not retrieval_text or retrieval_text.strip() == "No retrieval text found.":
|
| 24 |
return retrieval_text
|
|
@@ -33,8 +33,12 @@ def process_retrieval_text(retrieval_text, user_input):
|
|
| 33 |
|
| 34 |
for i, section in enumerate(doc_sections):
|
| 35 |
if section.strip():
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
else:
|
| 39 |
# Fallback to a simpler approach - split by double newlines
|
| 40 |
# but combine any small chunks that appear to be part of the same document
|
|
@@ -61,8 +65,8 @@ def process_retrieval_text(retrieval_text, user_input):
|
|
| 61 |
if current_chunk:
|
| 62 |
chunks.append(current_chunk)
|
| 63 |
|
| 64 |
-
# Format each chunk
|
| 65 |
-
chunks = [f"<
|
| 66 |
for i, chunk in enumerate(chunks)]
|
| 67 |
|
| 68 |
# Extract keywords from user input (longer than 3 chars)
|
|
@@ -82,7 +86,7 @@ def process_retrieval_text(retrieval_text, user_input):
|
|
| 82 |
|
| 83 |
highlighted_chunks.append(highlighted_chunk)
|
| 84 |
|
| 85 |
-
return "<
|
| 86 |
|
| 87 |
# API Keys - hardcoded for convenience
|
| 88 |
# Replace these with your actual API keys
|
|
@@ -91,7 +95,7 @@ TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY", "")
|
|
| 91 |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
| 92 |
PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY", "")
|
| 93 |
|
| 94 |
-
# Custom CSS for styling
|
| 95 |
CUSTOM_CSS = """
|
| 96 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap');
|
| 97 |
|
|
@@ -277,22 +281,35 @@ textarea.svelte-1pie7s6 {
|
|
| 277 |
margin-bottom: 12px;
|
| 278 |
}
|
| 279 |
|
| 280 |
-
/* Document section formatting */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
.doc-section {
|
| 282 |
-
margin-bottom:
|
| 283 |
padding-bottom: 15px;
|
| 284 |
-
border-bottom: 1px solid #
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
}
|
| 286 |
|
| 287 |
.doc-title {
|
| 288 |
font-weight: bold;
|
| 289 |
-
margin-bottom:
|
| 290 |
-
color: #
|
|
|
|
|
|
|
| 291 |
}
|
| 292 |
|
| 293 |
.doc-content {
|
| 294 |
padding-left: 10px;
|
| 295 |
border-left: 3px solid #f0f0f0;
|
|
|
|
| 296 |
}
|
| 297 |
|
| 298 |
/* Matching text highlighting */
|
|
@@ -376,6 +393,9 @@ class ContextualAPIUtils:
|
|
| 376 |
response_json = response.json()
|
| 377 |
|
| 378 |
response_content = response_json.get("message", {}).get("content", "No content received.")
|
|
|
|
|
|
|
|
|
|
| 379 |
retrieved_texts = [
|
| 380 |
f"Doc: {item.get('doc_name', 'Unknown')}, Page: {item.get('page', 'N/A')}\n"
|
| 381 |
f"Content: {item.get('content_text', 'No Content')}"
|
|
@@ -525,7 +545,7 @@ def rate_user_input(user_input):
|
|
| 525 |
|
| 526 |
# Format responses carefully to avoid random line breaks
|
| 527 |
llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
|
| 528 |
-
|
| 529 |
|
| 530 |
# Process retrieval text to highlight keywords with better formatting
|
| 531 |
processed_retrieval = process_retrieval_text(contextual_retrieval, user_input)
|
|
@@ -547,7 +567,7 @@ def rate_user_input(user_input):
|
|
| 547 |
# Create the popup div (initially hidden)
|
| 548 |
knowledge_html = f"""
|
| 549 |
<div id="{popup_id}" class="knowledge-popup" style="display: none;">
|
| 550 |
-
<div class="knowledge-popup-header">
|
| 551 |
<button class="knowledge-popup-close"
|
| 552 |
onclick="this.parentElement.style.display='none';
|
| 553 |
document.getElementById('btn-{popup_id}').style.display='inline-block';
|
|
@@ -700,7 +720,7 @@ def create_gradio_app():
|
|
| 700 |
# Create a file component to serve the PDF (hidden from UI)
|
| 701 |
pdf_file = gr.File("Hate Speech Policy.pdf", visible=False, label="Policy PDF")
|
| 702 |
|
| 703 |
-
# Add policy popup HTML with improved PDF
|
| 704 |
policy_popup_html = """
|
| 705 |
<div id="policy-popup" class="policy-popup">
|
| 706 |
<div class="policy-popup-content">
|
|
@@ -722,57 +742,111 @@ def create_gradio_app():
|
|
| 722 |
</div>
|
| 723 |
|
| 724 |
<script>
|
| 725 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
function openPolicyPopup() {
|
| 727 |
-
//
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
// Try multiple approaches to display the PDF
|
| 731 |
-
// 1. Google PDF viewer (works in most cases)
|
| 732 |
-
const googleViewerUrl = "https://docs.google.com/viewer?embedded=true&url=";
|
| 733 |
|
| 734 |
-
|
| 735 |
-
|
|
|
|
| 736 |
|
| 737 |
-
//
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
}
|
| 744 |
}
|
| 745 |
-
|
| 746 |
-
|
|
|
|
|
|
|
| 747 |
const iframe = document.getElementById("policy-iframe");
|
| 748 |
const fallback = document.getElementById("policy-fallback");
|
| 749 |
const downloadLink = document.getElementById("policy-download-link");
|
| 750 |
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
|
| 760 |
-
//
|
| 761 |
iframe.onerror = function() {
|
| 762 |
iframe.style.display = "none";
|
| 763 |
fallback.style.display = "block";
|
| 764 |
};
|
| 765 |
-
}
|
| 766 |
-
// No direct URL found, show fallback
|
| 767 |
-
iframe.style.display = "none";
|
| 768 |
-
fallback.style.display = "block";
|
| 769 |
-
downloadLink.href = "#";
|
| 770 |
-
downloadLink.textContent = "PDF not available";
|
| 771 |
-
}
|
| 772 |
-
|
| 773 |
-
// Display the popup
|
| 774 |
-
document.getElementById('policy-popup').style.display = 'flex';
|
| 775 |
}
|
|
|
|
|
|
|
|
|
|
| 776 |
</script>
|
| 777 |
"""
|
| 778 |
|
|
|
|
| 14 |
# Load environment variables (for local development)
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
+
# Process retrieval text to highlight relevant parts - IMPROVED FORMATTING
|
| 18 |
def process_retrieval_text(retrieval_text, user_input):
|
| 19 |
"""
|
| 20 |
Process the retrieval text by identifying proper document boundaries
|
| 21 |
+
and highlighting relevant keywords with improved formatting.
|
| 22 |
"""
|
| 23 |
if not retrieval_text or retrieval_text.strip() == "No retrieval text found.":
|
| 24 |
return retrieval_text
|
|
|
|
| 33 |
|
| 34 |
for i, section in enumerate(doc_sections):
|
| 35 |
if section.strip():
|
| 36 |
+
# Better formatting with clear section breaks
|
| 37 |
+
formatted_section = section.strip()
|
| 38 |
+
# Split Doc and Content on separate lines
|
| 39 |
+
formatted_section = formatted_section.replace("Doc:", "<strong>Document:</strong><br>")
|
| 40 |
+
formatted_section = formatted_section.replace("Content:", "<br><strong>Content:</strong><br>")
|
| 41 |
+
chunks.append(f"<div class='doc-section'><strong>Evidence Document {i+1}</strong><br>{formatted_section}</div>")
|
| 42 |
else:
|
| 43 |
# Fallback to a simpler approach - split by double newlines
|
| 44 |
# but combine any small chunks that appear to be part of the same document
|
|
|
|
| 65 |
if current_chunk:
|
| 66 |
chunks.append(current_chunk)
|
| 67 |
|
| 68 |
+
# Format each chunk with better section styling
|
| 69 |
+
chunks = [f"<div class='doc-section'><div class='doc-title'>Evidence Document {i+1}</div><div class='doc-content'>{chunk.strip()}</div></div>"
|
| 70 |
for i, chunk in enumerate(chunks)]
|
| 71 |
|
| 72 |
# Extract keywords from user input (longer than 3 chars)
|
|
|
|
| 86 |
|
| 87 |
highlighted_chunks.append(highlighted_chunk)
|
| 88 |
|
| 89 |
+
return "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
|
| 90 |
|
| 91 |
# API Keys - hardcoded for convenience
|
| 92 |
# Replace these with your actual API keys
|
|
|
|
| 95 |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
| 96 |
PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY", "")
|
| 97 |
|
| 98 |
+
# Custom CSS for styling - UPDATED CSS
|
| 99 |
CUSTOM_CSS = """
|
| 100 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap');
|
| 101 |
|
|
|
|
| 281 |
margin-bottom: 12px;
|
| 282 |
}
|
| 283 |
|
| 284 |
+
/* Document section formatting - IMPROVED */
|
| 285 |
+
.knowledge-sections {
|
| 286 |
+
border-radius: 5px;
|
| 287 |
+
background: #f9f9f9;
|
| 288 |
+
padding: 10px;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
.doc-section {
|
| 292 |
+
margin-bottom: 20px;
|
| 293 |
padding-bottom: 15px;
|
| 294 |
+
border-bottom: 1px solid #e0e0e0;
|
| 295 |
+
background: white;
|
| 296 |
+
padding: 15px;
|
| 297 |
+
border-radius: 5px;
|
| 298 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 299 |
}
|
| 300 |
|
| 301 |
.doc-title {
|
| 302 |
font-weight: bold;
|
| 303 |
+
margin-bottom: 10px;
|
| 304 |
+
color: #333;
|
| 305 |
+
border-bottom: 1px solid #eee;
|
| 306 |
+
padding-bottom: 5px;
|
| 307 |
}
|
| 308 |
|
| 309 |
.doc-content {
|
| 310 |
padding-left: 10px;
|
| 311 |
border-left: 3px solid #f0f0f0;
|
| 312 |
+
line-height: 1.5;
|
| 313 |
}
|
| 314 |
|
| 315 |
/* Matching text highlighting */
|
|
|
|
| 393 |
response_json = response.json()
|
| 394 |
|
| 395 |
response_content = response_json.get("message", {}).get("content", "No content received.")
|
| 396 |
+
# Prevent line breaks in the explanation
|
| 397 |
+
response_content = response_content.replace("\n", " ")
|
| 398 |
+
|
| 399 |
retrieved_texts = [
|
| 400 |
f"Doc: {item.get('doc_name', 'Unknown')}, Page: {item.get('page', 'N/A')}\n"
|
| 401 |
f"Content: {item.get('content_text', 'No Content')}"
|
|
|
|
| 545 |
|
| 546 |
# Format responses carefully to avoid random line breaks
|
| 547 |
llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
|
| 548 |
+
# Don't add line breaks to contextual rating
|
| 549 |
|
| 550 |
# Process retrieval text to highlight keywords with better formatting
|
| 551 |
processed_retrieval = process_retrieval_text(contextual_retrieval, user_input)
|
|
|
|
| 567 |
# Create the popup div (initially hidden)
|
| 568 |
knowledge_html = f"""
|
| 569 |
<div id="{popup_id}" class="knowledge-popup" style="display: none;">
|
| 570 |
+
<div class="knowledge-popup-header">Supporting evidence for Contextual Oracle</div>
|
| 571 |
<button class="knowledge-popup-close"
|
| 572 |
onclick="this.parentElement.style.display='none';
|
| 573 |
document.getElementById('btn-{popup_id}').style.display='inline-block';
|
|
|
|
| 720 |
# Create a file component to serve the PDF (hidden from UI)
|
| 721 |
pdf_file = gr.File("Hate Speech Policy.pdf", visible=False, label="Policy PDF")
|
| 722 |
|
| 723 |
+
# Add policy popup HTML with improved PDF loading
|
| 724 |
policy_popup_html = """
|
| 725 |
<div id="policy-popup" class="policy-popup">
|
| 726 |
<div class="policy-popup-content">
|
|
|
|
| 742 |
</div>
|
| 743 |
|
| 744 |
<script>
|
| 745 |
+
// Improved PDF loading code - will execute when page loads
|
| 746 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 747 |
+
// Preload PDF link
|
| 748 |
+
findPdfLink();
|
| 749 |
+
});
|
| 750 |
+
|
| 751 |
+
// Function to find the PDF link and cache it
|
| 752 |
+
let cachedPdfUrl = null;
|
| 753 |
+
function findPdfLink() {
|
| 754 |
+
// Only search if we haven't found it yet
|
| 755 |
+
if (!cachedPdfUrl) {
|
| 756 |
+
const links = document.querySelectorAll("a");
|
| 757 |
+
for (const link of links) {
|
| 758 |
+
if (link.href && link.href.includes("Hate%20Speech%20Policy.pdf")) {
|
| 759 |
+
cachedPdfUrl = link.href;
|
| 760 |
+
console.log("PDF link found and cached:", cachedPdfUrl);
|
| 761 |
+
break;
|
| 762 |
+
}
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
// If we didn't find it, set a timeout to keep trying
|
| 766 |
+
if (!cachedPdfUrl) {
|
| 767 |
+
setTimeout(findPdfLink, 1000);
|
| 768 |
+
}
|
| 769 |
+
}
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
// Function to handle opening the policy popup with improved reliability
|
| 773 |
function openPolicyPopup() {
|
| 774 |
+
// Display the popup right away
|
| 775 |
+
document.getElementById('policy-popup').style.display = 'flex';
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
|
| 777 |
+
const iframe = document.getElementById("policy-iframe");
|
| 778 |
+
const fallback = document.getElementById("policy-fallback");
|
| 779 |
+
const downloadLink = document.getElementById("policy-download-link");
|
| 780 |
|
| 781 |
+
// If we already have the PDF URL, use it
|
| 782 |
+
if (cachedPdfUrl) {
|
| 783 |
+
loadPdfIntoIframe(cachedPdfUrl);
|
| 784 |
+
} else {
|
| 785 |
+
// Otherwise, search for it again
|
| 786 |
+
const links = document.querySelectorAll("a");
|
| 787 |
+
let pdfUrl = null;
|
| 788 |
+
|
| 789 |
+
for (const link of links) {
|
| 790 |
+
if (link.href && link.href.includes("Hate%20Speech%20Policy.pdf")) {
|
| 791 |
+
pdfUrl = link.href;
|
| 792 |
+
cachedPdfUrl = pdfUrl; // Cache for future use
|
| 793 |
+
break;
|
| 794 |
+
}
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
if (pdfUrl) {
|
| 798 |
+
loadPdfIntoIframe(pdfUrl);
|
| 799 |
+
} else {
|
| 800 |
+
// Last resort - try to find the file component
|
| 801 |
+
const fileComponents = document.querySelectorAll("[data-testid='file']");
|
| 802 |
+
for (const comp of fileComponents) {
|
| 803 |
+
const downloadBtn = comp.querySelector("a");
|
| 804 |
+
if (downloadBtn && downloadBtn.href && downloadBtn.href.includes("file=")) {
|
| 805 |
+
pdfUrl = downloadBtn.href;
|
| 806 |
+
cachedPdfUrl = pdfUrl;
|
| 807 |
+
loadPdfIntoIframe(pdfUrl);
|
| 808 |
+
return;
|
| 809 |
+
}
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
// If we couldn't find the PDF, show fallback
|
| 813 |
+
iframe.style.display = "none";
|
| 814 |
+
fallback.style.display = "block";
|
| 815 |
+
downloadLink.href = "#";
|
| 816 |
+
downloadLink.textContent = "PDF not available";
|
| 817 |
}
|
| 818 |
}
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
// Function to load PDF into iframe with fallback
|
| 822 |
+
function loadPdfIntoIframe(pdfUrl) {
|
| 823 |
const iframe = document.getElementById("policy-iframe");
|
| 824 |
const fallback = document.getElementById("policy-fallback");
|
| 825 |
const downloadLink = document.getElementById("policy-download-link");
|
| 826 |
|
| 827 |
+
// Try direct embedding first (works in most browsers)
|
| 828 |
+
iframe.src = pdfUrl;
|
| 829 |
+
iframe.style.display = "block";
|
| 830 |
+
fallback.style.display = "none";
|
| 831 |
+
|
| 832 |
+
// Set the download link to the PDF
|
| 833 |
+
downloadLink.href = pdfUrl;
|
| 834 |
+
|
| 835 |
+
// If direct embedding fails, try Google Viewer as backup
|
| 836 |
+
iframe.onerror = function() {
|
| 837 |
+
const googleViewerUrl = "https://docs.google.com/viewer?embedded=true&url=";
|
| 838 |
+
iframe.src = googleViewerUrl + encodeURIComponent(pdfUrl);
|
| 839 |
|
| 840 |
+
// If even Google Viewer fails, show fallback
|
| 841 |
iframe.onerror = function() {
|
| 842 |
iframe.style.display = "none";
|
| 843 |
fallback.style.display = "block";
|
| 844 |
};
|
| 845 |
+
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 846 |
}
|
| 847 |
+
|
| 848 |
+
// Expose the function globally for button clicks
|
| 849 |
+
window.openPolicyPopup = openPolicyPopup;
|
| 850 |
</script>
|
| 851 |
"""
|
| 852 |
|