Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>AI Safety Datasets Overview</title> | |
| <style> | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', sans-serif; | |
| line-height: 1.6; | |
| color: #333; | |
| background: linear-gradient(135deg, #ff6b6b 0%, #ff8e53 100%); | |
| padding: 2rem 1rem; | |
| } | |
| .container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| background: white; | |
| border-radius: 12px; | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); | |
| overflow: hidden; | |
| } | |
| header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 3rem 2rem; | |
| text-align: center; | |
| } | |
| header h1 { | |
| font-size: 2.5rem; | |
| margin-bottom: 0.5rem; | |
| } | |
| header p { | |
| font-size: 1.1rem; | |
| opacity: 0.95; | |
| } | |
| .content { | |
| padding: 2rem; | |
| } | |
| section { | |
| margin-bottom: 3rem; | |
| } | |
| h2 { | |
| color: #667eea; | |
| font-size: 1.8rem; | |
| margin-bottom: 1rem; | |
| border-bottom: 2px solid #667eea; | |
| padding-bottom: 0.5rem; | |
| } | |
| h3 { | |
| color: #764ba2; | |
| font-size: 1.3rem; | |
| margin: 1.5rem 0 0.75rem 0; | |
| } | |
| .stats-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 1.5rem; | |
| margin: 2rem 0; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #667eea15 0%, #764ba215 100%); | |
| border-radius: 8px; | |
| padding: 1.5rem; | |
| border-left: 4px solid #667eea; | |
| } | |
| .stat-card h4 { | |
| color: #667eea; | |
| font-size: 0.9rem; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| margin-bottom: 0.5rem; | |
| } | |
| .stat-card .number { | |
| font-size: 2rem; | |
| font-weight: bold; | |
| color: #333; | |
| } | |
| .stat-card .label { | |
| color: #666; | |
| font-size: 0.9rem; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 1.5rem 0; | |
| background: white; | |
| } | |
| th { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 1rem; | |
| text-align: left; | |
| font-weight: 600; | |
| } | |
| td { | |
| padding: 0.75rem 1rem; | |
| border-bottom: 1px solid #e0e0e0; | |
| } | |
| tr:hover { | |
| background: #f8f9fa; | |
| } | |
| .dataset-links { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); | |
| gap: 1.5rem; | |
| margin: 2rem 0; | |
| } | |
| .dataset-card { | |
| background: white; | |
| border: 2px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 1.5rem; | |
| transition: all 0.3s ease; | |
| } | |
| .dataset-card:hover { | |
| border-color: #667eea; | |
| transform: translateY(-4px); | |
| box-shadow: 0 8px 16px rgba(102, 126, 234, 0.2); | |
| } | |
| .dataset-card h4 { | |
| color: #667eea; | |
| margin-bottom: 0.5rem; | |
| } | |
| .dataset-card p { | |
| color: #666; | |
| font-size: 0.9rem; | |
| margin-bottom: 1rem; | |
| } | |
| .btn { | |
| display: inline-block; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 0.75rem 1.5rem; | |
| text-decoration: none; | |
| border-radius: 6px; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4); | |
| } | |
| .warning-box { | |
| background: #fff3cd; | |
| border-left: 4px solid #ffc107; | |
| padding: 1.5rem; | |
| margin: 1.5rem 0; | |
| border-radius: 4px; | |
| } | |
| .warning-box h4 { | |
| color: #856404; | |
| margin-bottom: 0.5rem; | |
| } | |
| .info-box { | |
| background: #d1ecf1; | |
| border-left: 4px solid #0c5460; | |
| padding: 1.5rem; | |
| margin: 1.5rem 0; | |
| border-radius: 4px; | |
| } | |
| ul { | |
| margin-left: 2rem; | |
| margin-top: 0.5rem; | |
| } | |
| li { | |
| margin-bottom: 0.5rem; | |
| } | |
| code { | |
| background: #f4f4f4; | |
| padding: 0.2rem 0.4rem; | |
| border-radius: 3px; | |
| font-family: 'Courier New', monospace; | |
| font-size: 0.9em; | |
| } | |
| footer { | |
| background: #f8f9fa; | |
| padding: 2rem; | |
| text-align: center; | |
| color: #666; | |
| border-top: 1px solid #e0e0e0; | |
| } | |
| @media (max-width: 768px) { | |
| header h1 { | |
| font-size: 1.8rem; | |
| } | |
| .stats-grid { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <header> | |
| <h1>π‘οΈ AI Safety Datasets Collection</h1> | |
| <p>Comprehensive evaluation datasets for testing AI model safety mechanisms</p> | |
| </header> | |
| <div class="content"> | |
| <!-- Overview Section --> | |
| <section> | |
| <h2>π Dataset Collection Summary</h2> | |
| <div class="stats-grid"> | |
| <div class="stat-card"> | |
| <h4>Total Conversations</h4> | |
| <div class="number">10,321+</div> | |
| <div class="label">Across all datasets</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Total Turns</h4> | |
| <div class="number">73,258+</div> | |
| <div class="label">Multi-turn interactions</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Dataset Types</h4> | |
| <div class="number">4</div> | |
| <div class="label">Complementary methodologies</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h4>Sample Data</h4> | |
| <div class="number">200</div> | |
| <div class="label">Free conversations available</div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Full Dataset Statistics --> | |
| <section> | |
| <h2>π Full Dataset Statistics</h2> | |
| <table> | |
| <thead> | |
| <tr> | |
| <th>Dataset</th> | |
| <th>Conversations</th> | |
| <th>Turns</th> | |
| <th>Avg Turns/Conv</th> | |
| <th>Focus</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr> | |
| <td><strong>Original Multi-turn</strong></td> | |
| <td>594+</td> | |
| <td>4,642+</td> | |
| <td>7.8</td> | |
| <td>Baseline organic conversations</td> | |
| </tr> | |
| <tr> | |
| <td> β Psychology</td> | |
| <td>158+</td> | |
| <td>1,583+</td> | |
| <td>10.0</td> | |
| <td>Psychology harm category</td> | |
| </tr> | |
| <tr> | |
| <td> β Illicit</td> | |
| <td>436+</td> | |
| <td>3,059+</td> | |
| <td>7.0</td> | |
| <td>Illicit harm category</td> | |
| </tr> | |
| <tr> | |
| <td><strong>Bio-transformed V1</strong></td> | |
| <td>1,309+</td> | |
| <td>6,784+</td> | |
| <td>5.2</td> | |
| <td>Direct bio-safety attacks</td> | |
| </tr> | |
| <tr> | |
| <td><strong>Bio-transformed V2</strong></td> | |
| <td>1,308+</td> | |
| <td>8,127+</td> | |
| <td>6.2</td> | |
| <td>Adaptive bio-safety attacks</td> | |
| </tr> | |
| <tr> | |
| <td><strong>Keyword-transformed</strong></td> | |
| <td>7,110+</td> | |
| <td>53,705+</td> | |
| <td>7.6</td> | |
| <td>Cross-domain harm transfer</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </section> | |
| <!-- Dataset Links --> | |
| <section> | |
| <h2>π Access Datasets on Hugging Face</h2> | |
| <div class="dataset-links"> | |
| <div class="dataset-card"> | |
| <h4>Original Multi-turn Conversations</h4> | |
| <p>Psychology + Illicit baseline conversations<br> | |
| <strong>Sample:</strong> 50 conversations, 390 turns</p> | |
| <a href="https://huggingface.co/datasets/julyai7/multi-turn-conversations" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| <div class="dataset-card"> | |
| <h4>Bio-transformed Synthetic V1</h4> | |
| <p>Direct bio-topic transformation methodology<br> | |
| <strong>Sample:</strong> 50 conversations, 449 turns</p> | |
| <a href="https://huggingface.co/datasets/julyai7/multi-turn-bio-transformed-synth-conversations-v1" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| <div class="dataset-card"> | |
| <h4>Bio-transformed Synthetic V2</h4> | |
| <p>Adaptive bio-topic transformation methodology<br> | |
| <strong>Sample:</strong> 50 conversations, 459 turns</p> | |
| <a href="https://huggingface.co/datasets/julyai7/multi-turn-bio-transformed-synth-conversations-v2" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| <div class="dataset-card"> | |
| <h4>Keyword-transformed Synthetic</h4> | |
| <p>Cross-domain keyword substitution methodology<br> | |
| <strong>Sample:</strong> 50 conversations, 659 turns</p> | |
| <a href="https://huggingface.co/datasets/julyai7/multi-turn-keyword-transformed-synth-conversations" class="btn" target="_blank">View Dataset β</a> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Research Applications --> | |
| <section> | |
| <h2>π§ͺ Research Applications</h2> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem;"> | |
| <div> | |
| <h3>Safety Evaluation</h3> | |
| <ul> | |
| <li>Benchmark model safety</li> | |
| <li>Measure robustness</li> | |
| <li>Evaluate mechanisms</li> | |
| </ul> | |
| </div> | |
| <div> | |
| <h3>Red Teaming</h3> | |
| <ul> | |
| <li>Discover adversarial patterns</li> | |
| <li>Test safety guardrails</li> | |
| <li>Identify blind spots</li> | |
| </ul> | |
| </div> | |
| <div> | |
| <h3>Model Training</h3> | |
| <ul> | |
| <li>Fine-tune safety classifiers</li> | |
| <li>Train attack detectors</li> | |
| <li>Develop harm detection</li> | |
| </ul> | |
| </div> | |
| <div> | |
| <h3>Safety Research</h3> | |
| <ul> | |
| <li>Study harm transfer</li> | |
| <li>Analyze attack patterns</li> | |
| <li>Understand dynamics</li> | |
| </ul> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Ethical Considerations --> | |
| <section> | |
| <h2>β οΈ Ethical Considerations</h2> | |
| <div class="warning-box"> | |
| <h4>β οΈ IMPORTANT</h4> | |
| <p>These datasets contain successful adversarial attacks and harmful content.</p> | |
| </div> | |
| <h3>β Intended Use</h3> | |
| <ul> | |
| <li>Defensive security research</li> | |
| <li>AI safety evaluation and improvement</li> | |
| <li>Academic research on adversarial robustness</li> | |
| <li>Training safety and moderation systems</li> | |
| </ul> | |
| <h3>β Prohibited Use</h3> | |
| <ul> | |
| <li>Creating offensive content</li> | |
| <li>Developing attack tools for malicious purposes</li> | |
| <li>Bypassing safety systems for harm</li> | |
| <li>Any use that violates laws or ethical guidelines</li> | |
| </ul> | |
| </section> | |
| <!-- Data Selection --> | |
| <section> | |
| <h2>π― Data Selection Process</h2> | |
| <div class="info-box"> | |
| <p>All datasets are derived from high-quality, validated conversations with strict quality filters including NeurIPS evaluation protocols.</p> | |
| </div> | |
| <h3>Base Criteria</h3> | |
| <ul> | |
| <li>Text-based objectives (no code execution templates)</li> | |
| <li>NeurIPS evaluation metadata present</li> | |
| <li>Verdict: <code>success</code> (harmful requests successfully fulfilled)</li> | |
| <li>Multi-turn conversations with prompt-response pairs</li> | |
| </ul> | |
| <h3>Psychology-Specific Criteria</h3> | |
| <ul> | |
| <li>Organic conversations (<code>organicity = true</code>)</li> | |
| <li>No disclaimer in responses</li> | |
| <li>Successfully elicited harmful psychology-related content</li> | |
| </ul> | |
| <h3>Illicit-Specific Criteria</h3> | |
| <ul> | |
| <li>Contains specific instruction details</li> | |
| <li>Practically executable (not abstract)</li> | |
| <li>Successfully elicited harmful illicit-related content</li> | |
| </ul> | |
| </section> | |
| <!-- License --> | |
| <section> | |
| <h2>π License</h2> | |
| <p>All datasets are released under <strong>CC-BY-NC-4.0</strong> (Creative Commons Attribution-NonCommercial 4.0 International).</p> | |
| <ul> | |
| <li>β Use for research and evaluation</li> | |
| <li>β Modify and build upon the data</li> | |
| <li>β Share with attribution</li> | |
| <li>β Commercial use without separate licensing</li> | |
| </ul> | |
| </section> | |
| <!-- Contact --> | |
| <section> | |
| <h2>πΌ Full Dataset Access</h2> | |
| <p>The sample datasets provide representative examples. Full datasets contain thousands of additional conversations with expanded harm categories and regular updates.</p> | |
| <p style="margin-top: 1rem;"><strong>For academic research or commercial licensing, please contact us with your research objectives, institutional affiliation, and intended use.</strong></p> | |
| </section> | |
| </div> | |
| <footer> | |
| <p><strong>Last Updated:</strong> November 24, 2025</p> | |
| <p style="margin-top: 0.5rem;">For detailed documentation, visit the individual dataset repositories on Hugging Face.</p> | |
| </footer> | |
| </div> | |
| </body> | |
| </html> | |