|
|
| export function normalizeInputText(rawText: string): string { |
| return String(rawText || "") |
| .replace(/\r/g, "\n") |
| .replace(/\bblanco\b/gi, " ") |
| .replace(/\bletras blanco\b/gi, " ") |
| .replace(/\bblancobl\b/gi, " ") |
| .replace(/[ \t]+/g, " ") |
| .replace(/\n{2,}/g, "\n") |
| .trim(); |
| } |
|
|
| export function removeAdministrativePatterns(text: string): string { |
| const patternsToRemove = [ |
| /Nombre:.*$/gim, |
| /Número:.*$/gim, |
| /Historia:.*$/gim, |
| /T\.\s*Sanitaria:.*$/gim, |
| /Solicitante:.*$/gim, |
| /Servicio:.*$/gim, |
| /Destino[s]?:.*$/gim, |
| /Destinos:.*$/gim, |
| /Centro:.*$/gim, |
| /Sexo:.*$/gim, |
| /Edad:.*$/gim, |
| /Habitación:.*$/gim, |
| /Cama:.*$/gim, |
| /Recep\.?Muestra.*$/gim, |
| /Fch\.?Informe.*$/gim, |
| /Fecha de análisis:.*$/gim, |
| /Resultados validados por:.*$/gim, |
| /Tipo de Muestra:.*$/gim, |
| /Tipo de informe:.*$/gim, |
| /Página\s*\d+\/\d+/gim, |
| /Hospital Ernest Lluch.*$/gim, |
| /H\.\s*ERNEST LLUCH.*$/gim, |
| /INFORME DE RESULTADOS.*$/gim, |
| /Calatayud,.*$/gim, |
| /Tecnica:.*$/gim, |
| /T[eé]cnica:.*$/gim, |
| /_{5,}/g |
| ]; |
|
|
| let cleaned = text; |
| patternsToRemove.forEach((rx) => { |
| cleaned = cleaned.replace(rx, ""); |
| }); |
|
|
| return cleaned; |
| } |
|
|
| export function removeDirectIdentifiers(text: string): string { |
| return text |
| .replace(/\b[A-Z]{2}\d{9,}[A-Z]?\b/g, " ") |
| .replace(/\b\d{6,}\b/g, " ") |
| .replace(/\b\d{1,2}\/\d{1,2}\/\d{2,4}\b/g, " ") |
| .replace(/\b\d{1,2}:\d{2}:\d{2}\b/g, " ") |
| .replace(/\b[A-Z]{2,}-[A-Z]{2,}\b/g, " ") |
| .replace(/[A-ZÁÉÍÓÚÑ]{2,},\s*[A-ZÁÉÍÓÚÑ\s]{2,}/g, " "); |
| } |
|
|
| export function keepUsefulLabLines(lines: string[]): string[] { |
| const allowedSectionPatterns = [ |
| /^BIOQUIMICA$/i, |
| /^BIOQUIMICA GENERAL$/i, |
| /^HEMATOLOGIA$/i, |
| /^HEMATIMETRIA$/i, |
| /^HEMOSTASIA$/i, |
| /^GASOMETRIA$/i, |
| /^ORINA$/i, |
| /^COAGULACION$/i, |
| /^OTRAS PRUEBAS:?$/i |
| ]; |
|
|
| const obviousNoisePatterns = [ |
| /^Magnitud Resultado Unidades Intervalo de Referencia Biológico$/i, |
| /^Tipo de informe:.*$/i, |
| /^Resultados validados por:.*$/i, |
| /^Tipo de Muestra:.*$/i, |
| /^\.+$/, |
| /^-$/, |
| /^Hospital .*$/i, |
| /^INFORME DE RESULTADOS$/i, |
| /^Nombre:.*$/i, |
| /^Historia:.*$/i, |
| /^Sexo:.*$/i, |
| /^Edad:.*$/i, |
| /^Recep\.Muestra.*$/i, |
| /^Fch\.Informe.*$/i |
| ]; |
|
|
| return lines.filter((line) => { |
| if (!line || line.length < 2) return false; |
| if (obviousNoisePatterns.some((rx) => rx.test(line))) return false; |
| if (allowedSectionPatterns.some((rx) => rx.test(line))) return true; |
|
|
| const hasPend = /\bPEND\b/i.test(line); |
| const hasValue = /\b\d+[.,]?\d*\b/.test(line); |
| const hasRange = /\b\d+[.,]?\d*\s*-\s*\d+[.,]?\d*\b/.test(line); |
| const hasUnits = |
| /\b(mg\/dL|g\/dL|mmol\/L|mEq\/L|mil\/mm3|mill\/mm3|fl|pg|%|u\/L|ui\/L|ng\/mL|mL\/min\/1\.73m\^?2|mL\/min|seg|s|mill|mil|pg|fl)\b/i.test( |
| line |
| ); |
| |
| |
| |
| const looksLikeParameter = /^[A-ZÁÉÍÓÚÑ][A-ZÁÉÍÓÚÑ0-9\s\.\-\(\)\/]{3,}$/i.test(line); |
|
|
| return hasPend || hasRange || hasUnits || hasValue || looksLikeParameter; |
| }); |
| } |
|
|
| export function finalizeCleanLines(text: string): string[] { |
| return text |
| .split("\n") |
| .map((line) => line.trim()) |
| .map((line) => line.replace(/[ \t]+/g, " ").trim()) |
| .filter(Boolean); |
| } |
|
|
| export function anonymizeLabsText(rawText: string): string { |
| let text = normalizeInputText(rawText); |
| text = removeAdministrativePatterns(text); |
| text = removeDirectIdentifiers(text); |
|
|
| const cleanedLines = finalizeCleanLines(text); |
| const usefulLines = keepUsefulLabLines(cleanedLines); |
|
|
| const result = usefulLines.join("\n").trim(); |
| console.log("Anonymized Labs Text length:", result.length); |
| return result; |
| } |
|
|
| export function anonymizeTreatmentsText(rawText: string): string { |
| let text = normalizeInputText(rawText); |
| text = removeAdministrativePatterns(text); |
| text = removeDirectIdentifiers(text); |
|
|
| const cleanedLines = finalizeCleanLines(text).filter((line) => { |
| const hasDose = |
| /\b\d+[.,]?\d*\s?(mg|mcg|g|ui|ml|ug)\b/i.test(line); |
| const hasFrequency = |
| /(cada\s+\d+\s*h|cada\s+\d+\s*horas|al d[ií]a|si precisa|por la noche|por la ma[ñn]ana|semanal|q\d+h)/i.test( |
| line |
| ); |
| const looksLikeDrug = |
| /^[A-ZÁÉÍÓÚÑa-záéíóúñ][A-ZÁÉÍÓÚÑa-záéíóúñ0-9\s\/\.\-]+$/i.test(line); |
|
|
| return looksLikeDrug || hasDose || hasFrequency; |
| }); |
|
|
| const result = cleanedLines.join("\n").trim(); |
| console.log("Anonymized Treatments Text length:", result.length); |
| return result; |
| } |
|
|