proteinea / src /lib /csv-utils.ts
Mahmoud Eljendy
feat: Antibody Studio — AI-native antibody design workspace by Proteinea
30cc31a
// csv-utils.ts — Shared CSV parsing utilities extracted from CSVViewer.
// Used by both CSVViewer and DesignIterationPanel.
export interface ParsedCsv {
header: string[];
rows: string[][];
}
/** Split text into logical lines, respecting double-quoted fields that may
* contain embedded newlines. Only breaks on \n (or \r\n) when outside quotes. */
function splitLines(text: string): string[] {
const lines: string[] = [];
let current = "";
let inQuotes = false;
for (let i = 0; i < text.length; i++) {
const ch = text[i];
if (ch === '"') {
inQuotes = !inQuotes;
current += ch;
} else if (ch === "\r" && text[i + 1] === "\n" && !inQuotes) {
lines.push(current);
current = "";
i++; // skip the \n
} else if (ch === "\n" && !inQuotes) {
lines.push(current);
current = "";
} else {
current += ch;
}
}
if (current.length > 0) lines.push(current);
return lines;
}
function parseLine(line: string, delim: string): string[] {
const out: string[] = [];
let field = "";
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (inQuotes) {
if (ch === '"') {
if (line[i + 1] === '"') {
field += '"';
i++;
} else {
inQuotes = false;
}
} else {
field += ch;
}
} else {
if (ch === '"' && field === "") {
inQuotes = true;
} else if (ch === delim) {
out.push(field);
field = "";
} else {
field += ch;
}
}
}
out.push(field);
return out;
}
export function parseCsv(input: string, delim: string): ParsedCsv {
let text = input;
if (text.charCodeAt(0) === 0xfeff) text = text.slice(1);
const lines = splitLines(text);
const parsed: string[][] = [];
for (const line of lines) {
if (line.length === 0) continue;
parsed.push(parseLine(line, delim));
}
while (parsed.length > 0) {
const last = parsed[parsed.length - 1];
if (last.every((c) => c === "")) parsed.pop();
else break;
}
if (parsed.length === 0) return { header: [], rows: [] };
const [header, ...rows] = parsed;
return { header, rows };
}
export function isNumericValue(v: string): boolean {
if (v === "" || v == null) return false;
const trimmed = v.trim();
if (trimmed === "") return false;
if (!/^-?\d*\.?\d+(?:[eE][-+]?\d+)?$/.test(trimmed)) return false;
return Number.isFinite(Number(trimmed));
}
export function detectNumericColumns(header: string[], rows: string[][]): boolean[] {
return header.map((_, col) => {
let total = 0;
let numeric = 0;
for (const row of rows) {
const v = row[col];
if (v === undefined || v === "") continue;
total++;
if (isNumericValue(v)) numeric++;
}
if (total === 0) return false;
return numeric / total >= 0.8;
});
}
/** Column names that indicate a CSV is a design results file. */
const DESIGN_COLUMNS = new Set([
"ipae",
"interaction_pae",
"plddt",
"pred_lddt",
"affinity",
"affinity_pm",
"consensus_score",
"design_id",
"tag",
"sequence",
]);
/** Returns true if the CSV header contains at least 2 design-related columns. */
export function isDesignCsv(header: string[]): boolean {
const lower = header.map((h) => h.toLowerCase().replace(/[^a-z0-9_]/g, "_"));
let matches = 0;
for (const col of lower) {
if (DESIGN_COLUMNS.has(col)) matches++;
}
return matches >= 2;
}
export function detectDelimiter(name: string, url: string): string {
const n = (name || url || "").toLowerCase();
return n.endsWith(".tsv") || n.includes("tab-separated") ? "\t" : ",";
}