Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>AI Safety Datasets Overview</title> | |
| <style> | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', sans-serif; | |
| line-height: 1.6; | |
| color: #333; | |
| background: linear-gradient(135deg, #ff6b6b 0%, #ff8e53 100%); | |
| padding: 2rem 1rem; | |
| } | |
| .container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| background: white; | |
| border-radius: 12px; | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); | |
| overflow: hidden; | |
| } | |
| header { | |
| background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%); | |
| color: white; | |
| padding: 3rem 2rem; | |
| text-align: center; | |
| } | |
| header h1 { | |
| font-size: 2.5rem; | |
| margin-bottom: 0.5rem; | |
| } | |
| header p { | |
| font-size: 1.1rem; | |
| opacity: 0.95; | |
| } | |
| .content { | |
| padding: 2rem; | |
| } | |
| section { | |
| margin-bottom: 3rem; | |
| } | |
| h2 { | |
| color: #1e40af; | |
| font-size: 1.8rem; | |
| margin-bottom: 1rem; | |
| border-bottom: 2px solid #1e40af; | |
| padding-bottom: 0.5rem; | |
| } | |
| h3 { | |
| color: #1e3a8a; | |
| font-size: 1.3rem; | |
| margin: 1.5rem 0 0.75rem 0; | |
| } | |
| .stats-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 1.5rem; | |
| margin: 2rem 0; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #1e40af15 0%, #3b82f615 100%); | |
| border-radius: 8px; | |
| padding: 1.5rem; | |
| border-left: 4px solid #1e40af; | |
| } | |
| .stat-card h4 { | |
| color: #1e40af; | |
| font-size: 0.9rem; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| margin-bottom: 0.5rem; | |
| } | |
| .stat-card .number { | |
| font-size: 2rem; | |
| font-weight: bold; | |
| color: #333; | |
| } | |
| .stat-card .label { | |
| color: #666; | |
| font-size: 0.9rem; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 1.5rem 0; | |
| background: white; | |
| } | |
| th { | |
| background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%); | |
| color: white; | |
| padding: 1rem; | |
| text-align: left; | |
| font-weight: 600; | |
| } | |
| td { | |
| padding: 0.75rem 1rem; | |
| border-bottom: 1px solid #e0e0e0; | |
| } | |
| tr:hover { | |
| background: #f8f9fa; | |
| } | |
| .dataset-links { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); | |
| gap: 1.5rem; | |
| margin: 2rem 0; | |
| } | |
| .dataset-card { | |
| background: white; | |
| border: 2px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 1.5rem; | |
| transition: all 0.3s ease; | |
| } | |
| .dataset-card:hover { | |
| border-color: #1e40af; | |
| transform: translateY(-4px); | |
| box-shadow: 0 8px 16px rgba(30, 64, 175, 0.2); | |
| } | |
| .dataset-card h4 { | |
| color: #1e40af; | |
| margin-bottom: 0.5rem; | |
| } | |
| .dataset-card p { | |
| color: #666; | |
| font-size: 0.9rem; | |
| margin-bottom: 1rem; | |
| } | |
| .btn { | |
| display: inline-block; | |
| background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%); | |
| color: white; | |
| padding: 0.75rem 1.5rem; | |
| text-decoration: none; | |
| border-radius: 6px; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 12px rgba(30, 64, 175, 0.4); | |
| } | |
| .warning-box { | |
| background: #fff3cd; | |
| border-left: 4px solid #ffc107; | |
| padding: 1.5rem; | |
| margin: 1.5rem 0; | |
| border-radius: 4px; | |
| } | |
| .warning-box h4 { | |
| color: #856404; | |
| margin-bottom: 0.5rem; | |
| } | |
| .info-box { | |
| background: #d1ecf1; | |
| border-left: 4px solid #0c5460; | |
| padding: 1.5rem; | |
| margin: 1.5rem 0; | |
| border-radius: 4px; | |
| } | |
| ul { | |
| margin-left: 2rem; | |
| margin-top: 0.5rem; | |
| } | |
| li { | |
| margin-bottom: 0.5rem; | |
| } | |
| code { | |
| background: #f4f4f4; | |
| padding: 0.2rem 0.4rem; | |
| border-radius: 3px; | |
| font-family: 'Courier New', monospace; | |
| font-size: 0.9em; | |
| } | |
| footer { | |
| background: #f8f9fa; | |
| padding: 2rem; | |
| text-align: center; | |
| color: #666; | |
| border-top: 1px solid #e0e0e0; | |
| } | |
| @media (max-width: 768px) { | |
| header h1 { | |
| font-size: 1.8rem; | |
| } | |
| .stats-grid { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <header> | |
| <h1>π‘οΈ AI Safety Datasets Collection</h1> | |
| <p>Comprehensive evaluation datasets for testing AI model safety mechanisms</p> | |
| </header> | |
| <div class="content"> | |
| <!-- Overview Section --> | |
| <section> | |
| <h2>π Dataset Collection Summary</h2> | |
| <div class="stats-grid"> | |
| <div class="stat-card"> | |
| <h4>Total Conversations</h4> | |
| <div class="number">849+</div> | |
| <div class="label">Across all datasets</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Total Turns</h4> | |
| <div class="number">6694+</div> | |
| <div class="label">Multi-turn interactions</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Dataset Types</h4> | |
| <div class="number">3</div> | |
| <div class="label">Complementary methodologies</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Sample Data</h4> | |
| <div class="number">150</div> | |
| <div class="label">Free conversations available</div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Full Dataset Statistics --> | |
| <section> | |
| <h2>π Full Dataset Statistics</h2> | |
| <table> | |
| <thead> | |
| <tr> | |
| <th>Dataset</th> | |
| <th>Conversations</th> | |
| <th>Turns</th> | |
| <th>Avg Turns/Conv</th> | |
| <th>Focus</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr> | |
| <td><strong>Psychology multi-turn</strong></td> | |
| <td>184+</td> | |
| <td>1964+</td> | |
| <td>10.3</td> | |
| <td>Psychology harmfulness such as self-harm, psychosis, anthropomorphism, etc.</td> | |
| </tr> | |
| <tr> | |
| <td><strong>Illicit (bioweapon) multi-turn</strong></td> | |
| <td>84+</td> | |
| <td>822+</td> | |
| <td>9.8</td> | |
| <td>Bio-safety harmfulness such as bioweapons, pathogens, etc.</td> | |
| </tr> | |
| <tr> | |
| <td><strong>Illicit (chemical, general) multi-turn</strong></td> | |
| <td>581+</td> | |
| <td>3908+</td> | |
| <td>6.7</td> | |
| <td>Non-bio safety harmfulness such as chemical weapons, cyber threats, etc.</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </section> | |
| <!-- Dataset Links --> | |
| <section> | |
| <h2>π Access Datasets on Hugging Face</h2> | |
| <div class="dataset-links"> | |
| <div class="dataset-card"> | |
| <h4>Psychology Multi-turn Conversations</h4> | |
| <p>Psychology harmfulness such as self-harm, psychosis, anthropomorphism, etc.<br> | |
| <strong>Sample:</strong> 5 conversations</p> | |
| <a href="https://huggingface.co/datasets/GoJulyAI/psychology-multi-turn" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| <div class="dataset-card"> | |
| <h4>Illicit (bioweapon) Multi-turn Conversations</h4> | |
| <p>Bio-safety harmfulness such as bioweapons, pathogens, etc.<br> | |
| <strong>Sample:</strong> 5 conversations</p> | |
| <a href="https://huggingface.co/datasets/GoJulyAI/illicit-bio-multi-turn" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| <div class="dataset-card"> | |
| <h4>Illicit (chemical, general) Multi-turn Conversations</h4> | |
| <p>Non-bio safety harmfulness such as chemical weapons, cyber threats, etc.<br> | |
| <strong>Sample:</strong> 5 conversations</p> | |
| <a href="https://huggingface.co/datasets/GoJulyAI/illicit-general-multi-turn" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Ethical Considerations --> | |
| <section> | |
| <h2>β οΈ Ethical Considerations</h2> | |
| <div class="warning-box"> | |
| <h4>β οΈ IMPORTANT</h4> | |
| <p>These datasets contain successful adversarial attacks and harmful content.</p> | |
| </div> | |
| <h3>β Intended Use</h3> | |
| <ul> | |
| <li>Defensive security research</li> | |
| <li>AI safety evaluation and improvement</li> | |
| <li>Academic research on adversarial robustness</li> | |
| <li>Training safety and moderation systems</li> | |
| </ul> | |
| <h3>β Prohibited Use</h3> | |
| <ul> | |
| <li>Creating offensive content</li> | |
| <li>Developing attack tools for malicious purposes</li> | |
| <li>Bypassing safety systems for harm</li> | |
| <li>Any use that violates laws or ethical guidelines</li> | |
| </ul> | |
| </section> | |
| <!-- Data Selection --> | |
| <section> | |
| <h2>π― Data Selection Process</h2> | |
| <div class="info-box"> | |
| <p>All datasets are derived from high-quality, validated conversations with strict quality filters including NeurIPS evaluation protocols.</p> | |
| </div> | |
| <h3>Base Criteria</h3> | |
| <ul> | |
| <li>Text-based objectives (no code execution templates)</li> | |
| <li>Verdict: <code>success</code> (harmful requests successfully fulfilled)</li> | |
| <li>Multi-turn conversations with prompt-response pairs</li> | |
| </ul> | |
| <h3>Psychology-Specific Criteria</h3> | |
| <ul> | |
| <li>Organic conversations (<code>organicity = true</code>)</li> | |
| <li>Successfully elicited harmful psychology-related content</li> | |
| </ul> | |
| <h3>Illicit-Specific Criteria</h3> | |
| <ul> | |
| <li>Contains specific instruction details</li> | |
| <li>Practically executable (not abstract)</li> | |
| <li>Successfully elicited harmful illicit-related content</li> | |
| </ul> | |
| </section> | |
| <!-- License --> | |
| <section> | |
| <h2>π License</h2> | |
| <p>Sample datasets are released under <strong>CC-BY-NC-4.0</strong> (Creative Commons Attribution-NonCommercial 4.0 International).</p> | |
| <ul> | |
| <li>β Use for research and evaluation</li> | |
| <li>β Modify and build upon the data</li> | |
| <li>β Share with attribution</li> | |
| <li>β Commercial use without separate licensing</li> | |
| </ul> | |
| </section> | |
| <!-- Contact --> | |
| <section> | |
| <h2>πΌ Full Dataset Access</h2> | |
| <p>The sample datasets provide representative examples. Full datasets contain thousands of additional conversations with expanded harm categories and regular updates.</p> | |
| <p style="margin-top: 1rem;"><strong>Please contact us at <a href="mailto:info@gojuly.ai" style="color: #1e40af; text-decoration: none;">info@gojuly.ai</a> to purchase any or all of full datasets.</strong></p> | |
| <p style="margin-top: 0.5rem;">Include your research objectives, institutional affiliation, and intended use in your inquiry.</p> | |
| </section> | |
| </div> | |
| <footer> | |
| <p><strong>Last Updated:</strong> December 2, 2025</p> | |
| <p style="margin-top: 0.5rem;">For detailed documentation, visit the individual dataset repositories on Hugging Face.</p> | |
| </footer> | |
| </div> | |
| </body> | |
| </html> | |