HellApp / src /lib /anonymizer.ts
aarnal80's picture
Upload 17 files
2a40140 verified
export function normalizeInputText(rawText: string): string {
return String(rawText || "")
.replace(/\r/g, "\n")
.replace(/\bblanco\b/gi, " ")
.replace(/\bletras blanco\b/gi, " ")
.replace(/\bblancobl\b/gi, " ")
.replace(/[ \t]+/g, " ")
.replace(/\n{2,}/g, "\n")
.trim();
}
export function removeAdministrativePatterns(text: string): string {
const patternsToRemove = [
/Nombre:.*$/gim,
/Número:.*$/gim,
/Historia:.*$/gim,
/T\.\s*Sanitaria:.*$/gim,
/Solicitante:.*$/gim,
/Servicio:.*$/gim,
/Destino[s]?:.*$/gim,
/Destinos:.*$/gim,
/Centro:.*$/gim,
/Sexo:.*$/gim,
/Edad:.*$/gim,
/Habitación:.*$/gim,
/Cama:.*$/gim,
/Recep\.?Muestra.*$/gim,
/Fch\.?Informe.*$/gim,
/Fecha de análisis:.*$/gim,
/Resultados validados por:.*$/gim,
/Tipo de Muestra:.*$/gim,
/Tipo de informe:.*$/gim,
/Página\s*\d+\/\d+/gim,
/Hospital Ernest Lluch.*$/gim,
/H\.\s*ERNEST LLUCH.*$/gim,
/INFORME DE RESULTADOS.*$/gim,
/Calatayud,.*$/gim,
/Tecnica:.*$/gim,
/T[eé]cnica:.*$/gim,
/_{5,}/g
];
let cleaned = text;
patternsToRemove.forEach((rx) => {
cleaned = cleaned.replace(rx, "");
});
return cleaned;
}
export function removeDirectIdentifiers(text: string): string {
return text
.replace(/\b[A-Z]{2}\d{9,}[A-Z]?\b/g, " ")
.replace(/\b\d{6,}\b/g, " ")
.replace(/\b\d{1,2}\/\d{1,2}\/\d{2,4}\b/g, " ")
.replace(/\b\d{1,2}:\d{2}:\d{2}\b/g, " ")
.replace(/\b[A-Z]{2,}-[A-Z]{2,}\b/g, " ")
.replace(/[A-ZÁÉÍÓÚÑ]{2,},\s*[A-ZÁÉÍÓÚÑ\s]{2,}/g, " ");
}
export function keepUsefulLabLines(lines: string[]): string[] {
const allowedSectionPatterns = [
/^BIOQUIMICA$/i,
/^BIOQUIMICA GENERAL$/i,
/^HEMATOLOGIA$/i,
/^HEMATIMETRIA$/i,
/^HEMOSTASIA$/i,
/^GASOMETRIA$/i,
/^ORINA$/i,
/^COAGULACION$/i,
/^OTRAS PRUEBAS:?$/i
];
const obviousNoisePatterns = [
/^Magnitud Resultado Unidades Intervalo de Referencia Biológico$/i,
/^Tipo de informe:.*$/i,
/^Resultados validados por:.*$/i,
/^Tipo de Muestra:.*$/i,
/^\.+$/,
/^-$/,
/^Hospital .*$/i,
/^INFORME DE RESULTADOS$/i,
/^Nombre:.*$/i,
/^Historia:.*$/i,
/^Sexo:.*$/i,
/^Edad:.*$/i,
/^Recep\.Muestra.*$/i,
/^Fch\.Informe.*$/i
];
return lines.filter((line) => {
if (!line || line.length < 2) return false;
if (obviousNoisePatterns.some((rx) => rx.test(line))) return false;
if (allowedSectionPatterns.some((rx) => rx.test(line))) return true;
const hasPend = /\bPEND\b/i.test(line);
const hasValue = /\b\d+[.,]?\d*\b/.test(line);
const hasRange = /\b\d+[.,]?\d*\s*-\s*\d+[.,]?\d*\b/.test(line);
const hasUnits =
/\b(mg\/dL|g\/dL|mmol\/L|mEq\/L|mil\/mm3|mill\/mm3|fl|pg|%|u\/L|ui\/L|ng\/mL|mL\/min\/1\.73m\^?2|mL\/min|seg|s|mill|mil|pg|fl)\b/i.test(
line
);
// If it has a value and units, or a value and a range, or is a section header, keep it.
// Also keep lines that look like parameters if they are reasonably long.
const looksLikeParameter = /^[A-ZÁÉÍÓÚÑ][A-ZÁÉÍÓÚÑ0-9\s\.\-\(\)\/]{3,}$/i.test(line);
return hasPend || hasRange || hasUnits || hasValue || looksLikeParameter;
});
}
export function finalizeCleanLines(text: string): string[] {
return text
.split("\n")
.map((line) => line.trim())
.map((line) => line.replace(/[ \t]+/g, " ").trim())
.filter(Boolean);
}
export function anonymizeLabsText(rawText: string): string {
let text = normalizeInputText(rawText);
text = removeAdministrativePatterns(text);
text = removeDirectIdentifiers(text);
const cleanedLines = finalizeCleanLines(text);
const usefulLines = keepUsefulLabLines(cleanedLines);
const result = usefulLines.join("\n").trim();
console.log("Anonymized Labs Text length:", result.length);
return result;
}
export function anonymizeTreatmentsText(rawText: string): string {
let text = normalizeInputText(rawText);
text = removeAdministrativePatterns(text);
text = removeDirectIdentifiers(text);
const cleanedLines = finalizeCleanLines(text).filter((line) => {
const hasDose =
/\b\d+[.,]?\d*\s?(mg|mcg|g|ui|ml|ug)\b/i.test(line);
const hasFrequency =
/(cada\s+\d+\s*h|cada\s+\d+\s*horas|al d[ií]a|si precisa|por la noche|por la ma[ñn]ana|semanal|q\d+h)/i.test(
line
);
const looksLikeDrug =
/^[A-ZÁÉÍÓÚÑa-záéíóúñ][A-ZÁÉÍÓÚÑa-záéíóúñ0-9\s\/\.\-]+$/i.test(line);
return looksLikeDrug || hasDose || hasFrequency;
});
const result = cleanedLines.join("\n").trim();
console.log("Anonymized Treatments Text length:", result.length);
return result;
}