Hoooong commited on
Commit
8fa1c80
·
1 Parent(s): b0768ab

Use a module to parse csv files

Browse files
Files changed (3) hide show
  1. package-lock.json +7 -0
  2. package.json +1 -0
  3. src/lib/csvUtils.ts +20 -85
package-lock.json CHANGED
@@ -25,6 +25,7 @@
25
  "ai": "^5.0.1",
26
  "class-variance-authority": "^0.7.1",
27
  "clsx": "^2.1.1",
 
28
  "docx": "^9.5.1",
29
  "lucide-react": "^0.539.0",
30
  "next": "15.4.5",
@@ -3611,6 +3612,12 @@
3611
  "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
3612
  "license": "MIT"
3613
  },
 
 
 
 
 
 
3614
  "node_modules/debug": {
3615
  "version": "4.4.3",
3616
  "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
 
25
  "ai": "^5.0.1",
26
  "class-variance-authority": "^0.7.1",
27
  "clsx": "^2.1.1",
28
+ "csv-parse": "^6.1.0",
29
  "docx": "^9.5.1",
30
  "lucide-react": "^0.539.0",
31
  "next": "15.4.5",
 
3612
  "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
3613
  "license": "MIT"
3614
  },
3615
+ "node_modules/csv-parse": {
3616
+ "version": "6.1.0",
3617
+ "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz",
3618
+ "integrity": "sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw==",
3619
+ "license": "MIT"
3620
+ },
3621
  "node_modules/debug": {
3622
  "version": "4.4.3",
3623
  "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
package.json CHANGED
@@ -26,6 +26,7 @@
26
  "ai": "^5.0.1",
27
  "class-variance-authority": "^0.7.1",
28
  "clsx": "^2.1.1",
 
29
  "docx": "^9.5.1",
30
  "lucide-react": "^0.539.0",
31
  "next": "15.4.5",
 
26
  "ai": "^5.0.1",
27
  "class-variance-authority": "^0.7.1",
28
  "clsx": "^2.1.1",
29
+ "csv-parse": "^6.1.0",
30
  "docx": "^9.5.1",
31
  "lucide-react": "^0.539.0",
32
  "next": "15.4.5",
src/lib/csvUtils.ts CHANGED
@@ -1,5 +1,6 @@
1
  import fs from 'fs';
2
  import path from 'path';
 
3
  import type { PromptExample } from '@/types/example';
4
 
5
  /**
@@ -12,29 +13,31 @@ export async function loadCsvExamples(filename: string): Promise<PromptExample[]
12
  const csvPath = path.join(process.cwd(), 'data', filename);
13
  const csvContent = fs.readFileSync(csvPath, 'utf-8');
14
 
 
 
 
 
 
 
 
 
15
  const examples: PromptExample[] = [];
16
- const lines = csvContent.split('\n');
17
- let i = 1; // Skip header
18
 
19
- while (i < lines.length) {
20
- if (!lines[i] || !lines[i].trim()) {
21
- i++;
22
- continue;
23
- }
24
 
25
- // Parse CSV entry that might span multiple lines
26
- const entry = parseCSVEntry(lines, i);
27
- if (entry.values.length >= 5) {
28
  examples.push({
29
- 年度: entry.values[0],
30
- 題號: entry.values[1],
31
- 子題型: entry.values[2],
32
- 題幹: entry.values[3],
33
- 選項: entry.values[4],
34
  });
35
- console.log(`Loaded example: ${entry.values[2]} (${entry.values[0]}-${entry.values[1]})`);
36
  }
37
- i = entry.nextIndex;
38
  }
39
 
40
  console.log(`Total examples loaded from ${filename}: ${examples.length}`);
@@ -44,71 +47,3 @@ export async function loadCsvExamples(filename: string): Promise<PromptExample[]
44
  return [];
45
  }
46
  }
47
-
48
-
49
- // Parse a CSV entry that might span multiple lines
50
- function parseCSVEntry(lines: string[], startIndex: number): { values: string[], nextIndex: number } {
51
- let currentLine = startIndex;
52
- let fullLine = '';
53
- let openQuotes = 0;
54
-
55
- // Combine lines until we have a complete CSV record
56
- while (currentLine < lines.length) {
57
- const line = lines[currentLine];
58
- fullLine += (fullLine ? '\n' : '') + line;
59
-
60
- // Count quotes to determine if we're inside a quoted field
61
- for (const char of line) {
62
- if (char === '"') {
63
- openQuotes++;
64
- }
65
- }
66
-
67
- // If quotes are balanced (even number), we have a complete record
68
- if (openQuotes % 2 === 0) {
69
- break;
70
- }
71
-
72
- currentLine++;
73
- }
74
-
75
- const values = parseCSVLine(fullLine);
76
- return {
77
- values: values,
78
- nextIndex: currentLine + 1
79
- };
80
- }
81
-
82
-
83
- // Helper function to parse CSV line with proper quote handling
84
- function parseCSVLine(line: string): string[] {
85
- const result: string[] = [];
86
- let current = '';
87
- let inQuotes = false;
88
- let i = 0;
89
-
90
- while (i < line.length) {
91
- const char = line[i];
92
-
93
- if (char === '"') {
94
- if (inQuotes && line[i + 1] === '"') {
95
- // Handle escaped quotes
96
- current += '"';
97
- i += 2;
98
- continue;
99
- } else {
100
- inQuotes = !inQuotes;
101
- }
102
- } else if (char === ',' && !inQuotes) {
103
- result.push(current.trim());
104
- current = '';
105
- } else {
106
- current += char;
107
- }
108
-
109
- i++;
110
- }
111
-
112
- result.push(current.trim());
113
- return result;
114
- }
 
1
  import fs from 'fs';
2
  import path from 'path';
3
+ import { parse } from 'csv-parse/sync';
4
  import type { PromptExample } from '@/types/example';
5
 
6
  /**
 
13
  const csvPath = path.join(process.cwd(), 'data', filename);
14
  const csvContent = fs.readFileSync(csvPath, 'utf-8');
15
 
16
+ // Parse CSV with csv-parse library (handles multi-line entries, escaped quotes, etc.)
17
+ const records = parse(csvContent, {
18
+ columns: false, // Return as array of arrays
19
+ skip_empty_lines: true,
20
+ relax_column_count: true, // Allow inconsistent column counts
21
+ trim: true, // Trim whitespace from values
22
+ }) as string[][];
23
+
24
  const examples: PromptExample[] = [];
 
 
25
 
26
+ // Skip header row (index 0) and process data rows
27
+ for (let i = 1; i < records.length; i++) {
28
+ const row = records[i];
 
 
29
 
30
+ // Ensure we have at least 5 columns
31
+ if (row && row.length >= 5) {
 
32
  examples.push({
33
+ 年度: row[0] || '',
34
+ 題號: row[1] || '',
35
+ 子題型: row[2] || '',
36
+ 題幹: row[3] || '',
37
+ 選項: row[4] || '',
38
  });
39
+ console.log(`Loaded example: ${row[2]} (${row[0]}-${row[1]})`);
40
  }
 
41
  }
42
 
43
  console.log(`Total examples loaded from ${filename}: ${examples.length}`);
 
47
  return [];
48
  }
49
  }