Spaces:
Paused
Paused
Use a module to parse csv files
Browse files- package-lock.json +7 -0
- package.json +1 -0
- src/lib/csvUtils.ts +20 -85
package-lock.json
CHANGED
|
@@ -25,6 +25,7 @@
|
|
| 25 |
"ai": "^5.0.1",
|
| 26 |
"class-variance-authority": "^0.7.1",
|
| 27 |
"clsx": "^2.1.1",
|
|
|
|
| 28 |
"docx": "^9.5.1",
|
| 29 |
"lucide-react": "^0.539.0",
|
| 30 |
"next": "15.4.5",
|
|
@@ -3611,6 +3612,12 @@
|
|
| 3611 |
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
| 3612 |
"license": "MIT"
|
| 3613 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3614 |
"node_modules/debug": {
|
| 3615 |
"version": "4.4.3",
|
| 3616 |
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
|
|
|
| 25 |
"ai": "^5.0.1",
|
| 26 |
"class-variance-authority": "^0.7.1",
|
| 27 |
"clsx": "^2.1.1",
|
| 28 |
+
"csv-parse": "^6.1.0",
|
| 29 |
"docx": "^9.5.1",
|
| 30 |
"lucide-react": "^0.539.0",
|
| 31 |
"next": "15.4.5",
|
|
|
|
| 3612 |
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
| 3613 |
"license": "MIT"
|
| 3614 |
},
|
| 3615 |
+
"node_modules/csv-parse": {
|
| 3616 |
+
"version": "6.1.0",
|
| 3617 |
+
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz",
|
| 3618 |
+
"integrity": "sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw==",
|
| 3619 |
+
"license": "MIT"
|
| 3620 |
+
},
|
| 3621 |
"node_modules/debug": {
|
| 3622 |
"version": "4.4.3",
|
| 3623 |
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
package.json
CHANGED
|
@@ -26,6 +26,7 @@
|
|
| 26 |
"ai": "^5.0.1",
|
| 27 |
"class-variance-authority": "^0.7.1",
|
| 28 |
"clsx": "^2.1.1",
|
|
|
|
| 29 |
"docx": "^9.5.1",
|
| 30 |
"lucide-react": "^0.539.0",
|
| 31 |
"next": "15.4.5",
|
|
|
|
| 26 |
"ai": "^5.0.1",
|
| 27 |
"class-variance-authority": "^0.7.1",
|
| 28 |
"clsx": "^2.1.1",
|
| 29 |
+
"csv-parse": "^6.1.0",
|
| 30 |
"docx": "^9.5.1",
|
| 31 |
"lucide-react": "^0.539.0",
|
| 32 |
"next": "15.4.5",
|
src/lib/csvUtils.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import fs from 'fs';
|
| 2 |
import path from 'path';
|
|
|
|
| 3 |
import type { PromptExample } from '@/types/example';
|
| 4 |
|
| 5 |
/**
|
|
@@ -12,29 +13,31 @@ export async function loadCsvExamples(filename: string): Promise<PromptExample[]
|
|
| 12 |
const csvPath = path.join(process.cwd(), 'data', filename);
|
| 13 |
const csvContent = fs.readFileSync(csvPath, 'utf-8');
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
const examples: PromptExample[] = [];
|
| 16 |
-
const lines = csvContent.split('\n');
|
| 17 |
-
let i = 1; // Skip header
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
continue;
|
| 23 |
-
}
|
| 24 |
|
| 25 |
-
//
|
| 26 |
-
|
| 27 |
-
if (entry.values.length >= 5) {
|
| 28 |
examples.push({
|
| 29 |
-
年度:
|
| 30 |
-
題號:
|
| 31 |
-
子題型:
|
| 32 |
-
題幹:
|
| 33 |
-
選項:
|
| 34 |
});
|
| 35 |
-
console.log(`Loaded example: ${
|
| 36 |
}
|
| 37 |
-
i = entry.nextIndex;
|
| 38 |
}
|
| 39 |
|
| 40 |
console.log(`Total examples loaded from ${filename}: ${examples.length}`);
|
|
@@ -44,71 +47,3 @@ export async function loadCsvExamples(filename: string): Promise<PromptExample[]
|
|
| 44 |
return [];
|
| 45 |
}
|
| 46 |
}
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
// Parse a CSV entry that might span multiple lines
|
| 50 |
-
function parseCSVEntry(lines: string[], startIndex: number): { values: string[], nextIndex: number } {
|
| 51 |
-
let currentLine = startIndex;
|
| 52 |
-
let fullLine = '';
|
| 53 |
-
let openQuotes = 0;
|
| 54 |
-
|
| 55 |
-
// Combine lines until we have a complete CSV record
|
| 56 |
-
while (currentLine < lines.length) {
|
| 57 |
-
const line = lines[currentLine];
|
| 58 |
-
fullLine += (fullLine ? '\n' : '') + line;
|
| 59 |
-
|
| 60 |
-
// Count quotes to determine if we're inside a quoted field
|
| 61 |
-
for (const char of line) {
|
| 62 |
-
if (char === '"') {
|
| 63 |
-
openQuotes++;
|
| 64 |
-
}
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
// If quotes are balanced (even number), we have a complete record
|
| 68 |
-
if (openQuotes % 2 === 0) {
|
| 69 |
-
break;
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
currentLine++;
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
-
const values = parseCSVLine(fullLine);
|
| 76 |
-
return {
|
| 77 |
-
values: values,
|
| 78 |
-
nextIndex: currentLine + 1
|
| 79 |
-
};
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
// Helper function to parse CSV line with proper quote handling
|
| 84 |
-
function parseCSVLine(line: string): string[] {
|
| 85 |
-
const result: string[] = [];
|
| 86 |
-
let current = '';
|
| 87 |
-
let inQuotes = false;
|
| 88 |
-
let i = 0;
|
| 89 |
-
|
| 90 |
-
while (i < line.length) {
|
| 91 |
-
const char = line[i];
|
| 92 |
-
|
| 93 |
-
if (char === '"') {
|
| 94 |
-
if (inQuotes && line[i + 1] === '"') {
|
| 95 |
-
// Handle escaped quotes
|
| 96 |
-
current += '"';
|
| 97 |
-
i += 2;
|
| 98 |
-
continue;
|
| 99 |
-
} else {
|
| 100 |
-
inQuotes = !inQuotes;
|
| 101 |
-
}
|
| 102 |
-
} else if (char === ',' && !inQuotes) {
|
| 103 |
-
result.push(current.trim());
|
| 104 |
-
current = '';
|
| 105 |
-
} else {
|
| 106 |
-
current += char;
|
| 107 |
-
}
|
| 108 |
-
|
| 109 |
-
i++;
|
| 110 |
-
}
|
| 111 |
-
|
| 112 |
-
result.push(current.trim());
|
| 113 |
-
return result;
|
| 114 |
-
}
|
|
|
|
| 1 |
import fs from 'fs';
|
| 2 |
import path from 'path';
|
| 3 |
+
import { parse } from 'csv-parse/sync';
|
| 4 |
import type { PromptExample } from '@/types/example';
|
| 5 |
|
| 6 |
/**
|
|
|
|
| 13 |
const csvPath = path.join(process.cwd(), 'data', filename);
|
| 14 |
const csvContent = fs.readFileSync(csvPath, 'utf-8');
|
| 15 |
|
| 16 |
+
// Parse CSV with csv-parse library (handles multi-line entries, escaped quotes, etc.)
|
| 17 |
+
const records = parse(csvContent, {
|
| 18 |
+
columns: false, // Return as array of arrays
|
| 19 |
+
skip_empty_lines: true,
|
| 20 |
+
relax_column_count: true, // Allow inconsistent column counts
|
| 21 |
+
trim: true, // Trim whitespace from values
|
| 22 |
+
}) as string[][];
|
| 23 |
+
|
| 24 |
const examples: PromptExample[] = [];
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
// Skip header row (index 0) and process data rows
|
| 27 |
+
for (let i = 1; i < records.length; i++) {
|
| 28 |
+
const row = records[i];
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
// Ensure we have at least 5 columns
|
| 31 |
+
if (row && row.length >= 5) {
|
|
|
|
| 32 |
examples.push({
|
| 33 |
+
年度: row[0] || '',
|
| 34 |
+
題號: row[1] || '',
|
| 35 |
+
子題型: row[2] || '',
|
| 36 |
+
題幹: row[3] || '',
|
| 37 |
+
選項: row[4] || '',
|
| 38 |
});
|
| 39 |
+
console.log(`Loaded example: ${row[2]} (${row[0]}-${row[1]})`);
|
| 40 |
}
|
|
|
|
| 41 |
}
|
| 42 |
|
| 43 |
console.log(`Total examples loaded from ${filename}: ${examples.length}`);
|
|
|
|
| 47 |
return [];
|
| 48 |
}
|
| 49 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|