File size: 4,846 Bytes
309320b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env node
import fs from 'fs/promises'
import path from 'path'

function usage() {
  console.log(`Usage:
  node scripts/count_column.js --file <file.csv> --column <columnName|index> [--out <out.txt>]

Options:
  --file     CSV 文件路径(必需)。
  --column   指定列名或列序号(1 表示第一列)。如果为数字则按 1-based 处理。
  --out      可选,将统计结果写入指定文件(默认:打印到控制台)。
  --help     显示帮助。
`)
}

function parseArgs(argv) {
  const args = argv.slice(2)
  const opts = { file: null, column: null, out: null }
  for (let i = 0; i < args.length; i++) {
    const a = args[i]
    if (a === '--help' || a === '-h') { opts.help = true; break }
    if (a === '--file') { opts.file = args[++i]; continue }
    if (a === '--column') { opts.column = args[++i]; continue }
    if (a === '--out') { opts.out = args[++i]; continue }
    // positional file
    if (!opts.file) opts.file = a
  }
  return opts
}

// 重用一个简单的 CSV 单行解析(支持双引号与双引号转义)
function parseCSVLine(line) {
  const res = []
  let cur = ''
  let inQuotes = false
  for (let i = 0; i < line.length; i++) {
    const ch = line[i]
    if (inQuotes) {
      if (ch === '"') {
        if (i + 1 < line.length && line[i + 1] === '"') { cur += '"'; i++ } else { inQuotes = false }
      } else { cur += ch }
    } else {
      if (ch === ',') { res.push(cur); cur = '' }
      else if (ch === '"') { inQuotes = true }
      else { cur += ch }
    }
  }
  res.push(cur)
  return res
}

async function readCSV(filePath) {
  const txt = await fs.readFile(filePath, 'utf8')
  const lines = txt.split(/\r?\n/)
  let headerLineIndex = null
  for (let i = 0; i < lines.length; i++) { if (lines[i].trim().length > 0) { headerLineIndex = i; break } }
  if (headerLineIndex === null) return { headers: [], rows: [] }
  const headers = parseCSVLine(lines[headerLineIndex])
  const rows = []
  for (let i = headerLineIndex + 1; i < lines.length; i++) {
    const l = lines[i]
    if (l == null || l.trim() === '') continue
    const vals = parseCSVLine(l)
    rows.push(vals)
  }
  return { headers, rows }
}

function normalizeKey(k) {
  if (k == null) return ''
  return String(k).trim()
}

async function main() {
  const opts = parseArgs(process.argv)
  if (opts.help) { usage(); return }
  if (!opts.file || !opts.column) { console.error('Missing --file or --column'); usage(); process.exit(1) }

  const filePath = path.isAbsolute(opts.file) ? opts.file : path.join(process.cwd(), opts.file)
  let stat
  try { stat = await fs.stat(filePath) } catch (e) { console.error('File not found:', filePath); process.exit(2) }
  if (!stat.isFile()) { console.error('Not a file:', filePath); process.exit(3) }

  const { headers, rows } = await readCSV(filePath)
  if (!headers || headers.length === 0) { console.error('No header found in CSV'); process.exit(4) }

  // 决定列索引:如果 opts.column 是纯数字则按 1-based 处理,否则按列名匹配(优先精确匹配,其次大小写忽略)
  let colIndex = -1
  if (/^\d+$/.test(opts.column)) {
    const idx = parseInt(opts.column, 10)
    colIndex = idx - 1
    if (colIndex < 0 || colIndex >= headers.length) { console.error('Column index out of range'); process.exit(5) }
  } else {
    // 尝试精确匹配
    colIndex = headers.indexOf(opts.column)
    if (colIndex === -1) {
      // 尝试不区分大小写匹配
      const lower = opts.column.toLowerCase()
      colIndex = headers.findIndex(h => String(h).toLowerCase() === lower)
      if (colIndex === -1) { console.error(`Column name not found: ${opts.column}`); process.exit(6) }
    }
  }

  const counts = new Map()
  for (const vals of rows) {
    const v = normalizeKey(vals[colIndex])
    counts.set(v, (counts.get(v) || 0) + 1)
  }

  // 排序:按计数降序,再按值字母升序
  const items = Array.from(counts.entries()).sort((a, b) => {
    if (b[1] !== a[1]) return b[1] - a[1]
    return String(a[0]).localeCompare(String(b[0]))
  })

  const outLines = []
  outLines.push(`File: ${filePath}`)
  outLines.push(`Column: ${headers[colIndex]} (index ${colIndex + 1})`)
  outLines.push(`Total distinct classes: ${items.length}`)
  outLines.push('')
  outLines.push('Value,Count')
  for (const [val, cnt] of items) outLines.push(`${val},${cnt}`)

  if (opts.out) {
    const outPath = path.isAbsolute(opts.out) ? opts.out : path.join(process.cwd(), opts.out)
    await fs.mkdir(path.dirname(outPath), { recursive: true })
    await fs.writeFile(outPath, outLines.join('\n'), 'utf8')
    console.log(`Wrote counts to ${outPath} (${items.length} distinct)`)
  } else {
    console.log(outLines.join('\n'))
  }
}

main().catch(err => { console.error('Error:', err && err.stack ? err.stack : err); process.exit(10) })