Spaces:

rwillats
/

Contextual-Policy-Engine-Hate-Speech-Classification

Sleeping

App Files Files Community

rwillats commited on Apr 18

Commit

a164c20

verified ·

1 Parent(s): 1f0e4c3

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

hate_speech_demo.py +126 -52

hate_speech_demo.py CHANGED Viewed

@@ -14,11 +14,11 @@ import json
 # Load environment variables (for local development)
 load_dotenv()
-# Process retrieval text to highlight relevant parts
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
-    and highlighting relevant keywords.
     """
     if not retrieval_text or retrieval_text.strip() == "No retrieval text found.":
         return retrieval_text
@@ -33,8 +33,12 @@ def process_retrieval_text(retrieval_text, user_input):
         for i, section in enumerate(doc_sections):
             if section.strip():
-                # Format nicely with document number
-                chunks.append(f"<strong>Evidence Document {i+1}</strong><br>{section.strip()}")
     else:
         # Fallback to a simpler approach - split by double newlines
         # but combine any small chunks that appear to be part of the same document
@@ -61,8 +65,8 @@ def process_retrieval_text(retrieval_text, user_input):
         if current_chunk:
             chunks.append(current_chunk)
-        # Format each chunk
-        chunks = [f"<strong>Evidence Document {i+1}</strong><br>{chunk.strip()}"
                   for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
@@ -82,7 +86,7 @@ def process_retrieval_text(retrieval_text, user_input):
         highlighted_chunks.append(highlighted_chunk)
-    return "<br><br>".join(highlighted_chunks)
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
@@ -91,7 +95,7 @@ TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY", "")
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
 PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY", "")
-# Custom CSS for styling
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap');
@@ -277,22 +281,35 @@ textarea.svelte-1pie7s6 {
     margin-bottom: 12px;
 }
-/* Document section formatting */
 .doc-section {
-    margin-bottom: 15px;
     padding-bottom: 15px;
-    border-bottom: 1px solid #eee;
 }
 .doc-title {
     font-weight: bold;
-    margin-bottom: 5px;
-    color: #444;
 }
 .doc-content {
     padding-left: 10px;
     border-left: 3px solid #f0f0f0;
 }
 /* Matching text highlighting */
@@ -376,6 +393,9 @@ class ContextualAPIUtils:
             response_json = response.json()
             response_content = response_json.get("message", {}).get("content", "No content received.")
             retrieved_texts = [
                 f"Doc: {item.get('doc_name', 'Unknown')}, Page: {item.get('page', 'N/A')}\n"
                 f"Content: {item.get('content_text', 'No Content')}"
@@ -525,7 +545,7 @@ def rate_user_input(user_input):
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
-    contextual_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', contextual_rating)
     # Process retrieval text to highlight keywords with better formatting
     processed_retrieval = process_retrieval_text(contextual_retrieval, user_input)
@@ -547,7 +567,7 @@ def rate_user_input(user_input):
         # Create the popup div (initially hidden)
         knowledge_html = f"""
         <div id="{popup_id}" class="knowledge-popup" style="display: none;">
-            <div class="knowledge-popup-header">Retrieved Knowledge</div>
             <button class="knowledge-popup-close"
                     onclick="this.parentElement.style.display='none';
                              document.getElementById('btn-{popup_id}').style.display='inline-block';
@@ -700,7 +720,7 @@ def create_gradio_app():
         # Create a file component to serve the PDF (hidden from UI)
         pdf_file = gr.File("Hate Speech Policy.pdf", visible=False, label="Policy PDF")
-        # Add policy popup HTML with improved PDF handling
         policy_popup_html = """
         <div id="policy-popup" class="policy-popup">
             <div class="policy-popup-content">
@@ -722,57 +742,111 @@ def create_gradio_app():
         </div>
         <script>
-        // Function to handle opening the policy popup
         function openPolicyPopup() {
-            // Set PDF URL - this approach is more reliable with Gradio
-            const pdfFileName = "Hate Speech Policy.pdf";
-            // Try multiple approaches to display the PDF
-            // 1. Google PDF viewer (works in most cases)
-            const googleViewerUrl = "https://docs.google.com/viewer?embedded=true&url=";
-            // 2. Direct link as fallback
-            let directPdfUrl = "";
-            // Find the PDF link by looking for file links in the DOM
-            const links = document.querySelectorAll("a");
-            for (const link of links) {
-                if (link.href && link.href.includes(encodeURIComponent(pdfFileName))) {
-                    directPdfUrl = link.href;
-                    break;
                 }
             }
-            // Set the iframe source if we found a link
             const iframe = document.getElementById("policy-iframe");
             const fallback = document.getElementById("policy-fallback");
             const downloadLink = document.getElementById("policy-download-link");
-            if (directPdfUrl) {
-                // Try Google Viewer first
-                iframe.src = googleViewerUrl + encodeURIComponent(directPdfUrl);
-                iframe.style.display = "block";
-                fallback.style.display = "none";
-                // Set the download link
-                downloadLink.href = directPdfUrl;
-                // Provide fallback in case Google Viewer fails
                 iframe.onerror = function() {
                     iframe.style.display = "none";
                     fallback.style.display = "block";
                 };
-            } else {
-                // No direct URL found, show fallback
-                iframe.style.display = "none";
-                fallback.style.display = "block";
-                downloadLink.href = "#";
-                downloadLink.textContent = "PDF not available";
-            }
-            // Display the popup
-            document.getElementById('policy-popup').style.display = 'flex';
         }
         </script>
         """

 # Load environment variables (for local development)
 load_dotenv()
+# Process retrieval text to highlight relevant parts - IMPROVED FORMATTING
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
+    and highlighting relevant keywords with improved formatting.
     """
     if not retrieval_text or retrieval_text.strip() == "No retrieval text found.":
         return retrieval_text
         for i, section in enumerate(doc_sections):
             if section.strip():
+                # Better formatting with clear section breaks
+                formatted_section = section.strip()
+                # Split Doc and Content on separate lines
+                formatted_section = formatted_section.replace("Doc:", "<strong>Document:</strong><br>")
+                formatted_section = formatted_section.replace("Content:", "<br><strong>Content:</strong><br>")
+                chunks.append(f"<div class='doc-section'><strong>Evidence Document {i+1}</strong><br>{formatted_section}</div>")
     else:
         # Fallback to a simpler approach - split by double newlines
         # but combine any small chunks that appear to be part of the same document
         if current_chunk:
             chunks.append(current_chunk)
+        # Format each chunk with better section styling
+        chunks = [f"<div class='doc-section'><div class='doc-title'>Evidence Document {i+1}</div><div class='doc-content'>{chunk.strip()}</div></div>"
                   for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
         highlighted_chunks.append(highlighted_chunk)
+    return "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
 PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY", "")
+# Custom CSS for styling - UPDATED CSS
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;600;700&display=swap');
     margin-bottom: 12px;
 }
+/* Document section formatting - IMPROVED */
+.knowledge-sections {
+    border-radius: 5px;
+    background: #f9f9f9;
+    padding: 10px;
+}
 .doc-section {
+    margin-bottom: 20px;
     padding-bottom: 15px;
+    border-bottom: 1px solid #e0e0e0;
+    background: white;
+    padding: 15px;
+    border-radius: 5px;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.05);
 }
 .doc-title {
     font-weight: bold;
+    margin-bottom: 10px;
+    color: #333;
+    border-bottom: 1px solid #eee;
+    padding-bottom: 5px;
 }
 .doc-content {
     padding-left: 10px;
     border-left: 3px solid #f0f0f0;
+    line-height: 1.5;
 }
 /* Matching text highlighting */
             response_json = response.json()
             response_content = response_json.get("message", {}).get("content", "No content received.")
+            # Prevent line breaks in the explanation
+            response_content = response_content.replace("\n", " ")
             retrieved_texts = [
                 f"Doc: {item.get('doc_name', 'Unknown')}, Page: {item.get('page', 'N/A')}\n"
                 f"Content: {item.get('content_text', 'No Content')}"
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
+    # Don't add line breaks to contextual rating
     # Process retrieval text to highlight keywords with better formatting
     processed_retrieval = process_retrieval_text(contextual_retrieval, user_input)
         # Create the popup div (initially hidden)
         knowledge_html = f"""
         <div id="{popup_id}" class="knowledge-popup" style="display: none;">
+            <div class="knowledge-popup-header">Supporting evidence for Contextual Oracle</div>
             <button class="knowledge-popup-close"
                     onclick="this.parentElement.style.display='none';
                              document.getElementById('btn-{popup_id}').style.display='inline-block';
         # Create a file component to serve the PDF (hidden from UI)
         pdf_file = gr.File("Hate Speech Policy.pdf", visible=False, label="Policy PDF")
+        # Add policy popup HTML with improved PDF loading
         policy_popup_html = """
         <div id="policy-popup" class="policy-popup">
             <div class="policy-popup-content">
         </div>
         <script>
+        // Improved PDF loading code - will execute when page loads
+        document.addEventListener('DOMContentLoaded', function() {
+            // Preload PDF link
+            findPdfLink();
+        });
+        // Function to find the PDF link and cache it
+        let cachedPdfUrl = null;
+        function findPdfLink() {
+            // Only search if we haven't found it yet
+            if (!cachedPdfUrl) {
+                const links = document.querySelectorAll("a");
+                for (const link of links) {
+                    if (link.href && link.href.includes("Hate%20Speech%20Policy.pdf")) {
+                        cachedPdfUrl = link.href;
+                        console.log("PDF link found and cached:", cachedPdfUrl);
+                        break;
+                    }
+                }
+                // If we didn't find it, set a timeout to keep trying
+                if (!cachedPdfUrl) {
+                    setTimeout(findPdfLink, 1000);
+                }
+            }
+        }
+        // Function to handle opening the policy popup with improved reliability
         function openPolicyPopup() {
+            // Display the popup right away
+            document.getElementById('policy-popup').style.display = 'flex';
+            const iframe = document.getElementById("policy-iframe");
+            const fallback = document.getElementById("policy-fallback");
+            const downloadLink = document.getElementById("policy-download-link");
+            // If we already have the PDF URL, use it
+            if (cachedPdfUrl) {
+                loadPdfIntoIframe(cachedPdfUrl);
+            } else {
+                // Otherwise, search for it again
+                const links = document.querySelectorAll("a");
+                let pdfUrl = null;
+                for (const link of links) {
+                    if (link.href && link.href.includes("Hate%20Speech%20Policy.pdf")) {
+                        pdfUrl = link.href;
+                        cachedPdfUrl = pdfUrl; // Cache for future use
+                        break;
+                    }
+                }
+                if (pdfUrl) {
+                    loadPdfIntoIframe(pdfUrl);
+                } else {
+                    // Last resort - try to find the file component
+                    const fileComponents = document.querySelectorAll("[data-testid='file']");
+                    for (const comp of fileComponents) {
+                        const downloadBtn = comp.querySelector("a");
+                        if (downloadBtn && downloadBtn.href && downloadBtn.href.includes("file=")) {
+                            pdfUrl = downloadBtn.href;
+                            cachedPdfUrl = pdfUrl;
+                            loadPdfIntoIframe(pdfUrl);
+                            return;
+                        }
+                    }
+                    // If we couldn't find the PDF, show fallback
+                    iframe.style.display = "none";
+                    fallback.style.display = "block";
+                    downloadLink.href = "#";
+                    downloadLink.textContent = "PDF not available";
                 }
             }
+        }
+        // Function to load PDF into iframe with fallback
+        function loadPdfIntoIframe(pdfUrl) {
             const iframe = document.getElementById("policy-iframe");
             const fallback = document.getElementById("policy-fallback");
             const downloadLink = document.getElementById("policy-download-link");
+            // Try direct embedding first (works in most browsers)
+            iframe.src = pdfUrl;
+            iframe.style.display = "block";
+            fallback.style.display = "none";
+            // Set the download link to the PDF
+            downloadLink.href = pdfUrl;
+            // If direct embedding fails, try Google Viewer as backup
+            iframe.onerror = function() {
+                const googleViewerUrl = "https://docs.google.com/viewer?embedded=true&url=";
+                iframe.src = googleViewerUrl + encodeURIComponent(pdfUrl);
+                // If even Google Viewer fails, show fallback
                 iframe.onerror = function() {
                     iframe.style.display = "none";
                     fallback.style.display = "block";
                 };
+            };
         }
+        // Expose the function globally for button clicks
+        window.openPolicyPopup = openPolicyPopup;
         </script>
         """