File size: 2,560 Bytes
c146f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import React from 'react';

const Datasets = () => {
  const datasets = [
    {
      name: "Web Text Corpus",
      size: "2.5TB",
      languages: "50+",
      description: "Diverse web content from reputable sources across multiple domains"
    },
    {
      name: "Academic Papers",
      size: "800GB",
      languages: "English",
      description: "Scientific papers and research documents from top conferences"
    },
    {
      name: "Code Repository",
      size: "1.2TB",
      languages: "10+",
      description: "Open source code across multiple programming languages"
    },
    {
      name: "Multilingual News",
      size: "1.8TB",
      languages: "25+",
      description: "Global news articles with temporal metadata"
    },
    {
      name: "Conversational Data",
      size: "500GB",
      languages: "15+",
      description: "Dialogues and conversational exchanges from various platforms"
    },
    {
      name: "Technical Documentation",
      size: "300GB",
      languages: "8+",
      description: "API docs, manuals, and technical guides"
    }
  ];

  return (
    <section id="datasets" className="py-20 bg-gray-50">
      <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
        <div className="text-center mb-16">
          <h2 className="text-3xl md:text-4xl font-bold text-gray-900 mb-4">
            Featured Datasets
          </h2>
          <p className="text-xl text-gray-600 max-w-2xl mx-auto">
            Comprehensive collections tailored for different LLM training scenarios
          </p>
        </div>
        <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
          {datasets.map((dataset, index) => (
            <div key={index} className="bg-white rounded-xl shadow-sm border p-6 hover:shadow-md transition-shadow">
              <h3 className="text-xl font-semibold text-gray-900 mb-2">{dataset.name}</h3>
              <div className="flex gap-4 text-sm text-gray-600 mb-3">
                <span className="bg-primary-50 text-primary-700 px-2 py-1 rounded">Size: {dataset.size}</span>
                <span className="bg-green-50 text-green-700 px-2 py-1 rounded">{dataset.languages} languages</span>
              </div>
              <p className="text-gray-600">{dataset.description}</p>
              <button className="mt-4 text-primary-600 hover:text-primary-700 font-semibold transition-colors">
                Learn More →
              </button>
            </div>
          ))}
        </div>
      </div>
    </section>
  );
};

export default Datasets;