import fs from "node:fs/promises"; import crypto from "node:crypto"; import { FileBlob, SpreadsheetFile } from "@oai/artifact-tool"; const [inputPath, outputPath, password = "20302030", supervisorPassword = "1448"] = process.argv.slice(2); const MAP_URLS = [ ["أماني مصطفى عبدالله الطيب", "https://stat2025-map.static.hf.space/Rahn/01.html"], ["اسعد بن ماجد بن", "https://stat2025-map.static.hf.space/Rahn/02.html"], ["ريم بنت محمد بن عبدالعزيز الملحم", "https://stat2025-map.static.hf.space/Rahn/03.html"], ["زكي بن عيسى بن", "https://stat2025-map.static.hf.space/Rahn/04.html"], ["ساره حسين بن عبدالهادي بوخمسين", "https://stat2025-map.static.hf.space/Rahn/05.html"], ["ساره خالد سليمان المحيسن", "https://stat2025-map.static.hf.space/Rahn/06.html"], ["صالح عبدالله صالح الدخيل", "https://stat2025-map.static.hf.space/Rahn/07.html"], ["طيبه فالح بن عبدالله الرويشد", "https://stat2025-map.static.hf.space/Rahn/08.html"], ["عبدالرحمن عبدالله سعد الحماد", "https://stat2025-map.static.hf.space/Rahn/09.html"], ["عبدالعزيز فهد عبدالعزيز العبلان", "https://stat2025-map.static.hf.space/Rahn/10.html"], ["علي جابر بن علي", "https://stat2025-map.static.hf.space/Rahn/11.html"], ["عماد بن عيسى بن", "https://stat2025-map.static.hf.space/Rahn/12.html"], ["غادة سعد عبدالرحمن الراحله", "https://stat2025-map.static.hf.space/Rahn/13.html"], ["فارس سمير سليماني", "https://stat2025-map.static.hf.space/Rahn/14.html"], ["فاطمة حميدي هيف القحطاني", "https://stat2025-map.static.hf.space/Rahn/15.html"], ["فوز عائد نومان المطيري", "https://stat2025-map.static.hf.space/Rahn/16.html"], ["قنوت محمد بن عبدالله آل حماد", "https://stat2025-map.static.hf.space/Rahn/17.html"], ["لين أحمد بن عبدالعزيز القصير", "https://stat2025-map.static.hf.space/Rahn/18.html"], ["ماجد سعد ناصر السبيعي", "https://stat2025-map.static.hf.space/Rahn/19.html"], ["مرتضى عبدالجليل بن عيسى", "https://stat2025-map.static.hf.space/Rahn/20.html"], ["ناصر منصور علي الرويس", "https://stat2025-map.static.hf.space/Rahn/21.html"], ["نبأ عادل بن عبدالكريم", "https://stat2025-map.static.hf.space/Rahn/22.html"], ["نوار عوض مقبل العنزى", "https://stat2025-map.static.hf.space/Rahn/23.html"], ["نوف سعود بن سالم الخثعمي", "https://stat2025-map.static.hf.space/Rahn/24.html"], ["نيللي حسين عبدالله الجعص", "https://stat2025-map.static.hf.space/Rahn/25.html"], ["هيه عبدالعزيز المزيني", "https://stat2025-map.static.hf.space/Rahn/26.html"], ]; function clean(value) { if (value === null || value === undefined || value === 0) return ""; return String(value).replace(/\u00a0/g, " ").replace(/\s+/g, " ").trim(); } function normalize(value) { return clean(value) .normalize("NFKD") .replace(/[\u064B-\u065F\u0670]/g, "") .replace(/[أإآ]/g, "ا") .replace(/ى/g, "ي") .replace(/ة/g, "ه") .toLowerCase(); } function numeric(value) { const parsed = Number(value); return Number.isFinite(parsed) ? parsed : null; } function coordinateFromText(value) { const text = clean(value); const match = text.match(/(-?\d{1,2}(?:\.\d+)?)\s*[,،]\s*(-?\d{1,3}(?:\.\d+)?)/); if (!match) return ""; const first = Number(match[1]); const second = Number(match[2]); if (first >= 15 && first <= 35 && second >= 35 && second <= 60) return `${first}, ${second}`; if (second >= 15 && second <= 35 && first >= 35 && first <= 60) return `${second}, ${first}`; return ""; } function canonicalCity(primary, fallback) { const raw = clean(primary) || clean(fallback) || "غير محدد"; const city = normalize(raw); if (city.includes("الصناعيه الثانيه") && city.includes("الدمام")) return "المدينة الصناعية الثانية بالدمام"; if ((city.includes("الصناعيه الاولي") || city === "الصناعيه الاولي") && !city.includes("الاحساء")) { return "المدينة الصناعية الأولى بالدمام"; } if (city.includes("الصناعيه الثالثه") && city.includes("الدمام")) return "المدينة الصناعية الثالثة بالدمام"; if (city.includes("الاحساء") || city === "العيون" || city.includes("واحه مدن")) return "المدينة الصناعية الأولى بالأحساء"; if (city.includes("حفر الباطن")) return "المدينة الصناعية بحفر الباطن"; if (city.includes("الملك سلمان") || city.includes("سكيكو") || city.includes("ارامكو")) return "مدينة الملك سلمان (سبارك)"; if (city.includes("الصناعيه الثانيه") && city.includes("الرياض")) return "المدينة الصناعية الثانية بالرياض"; return raw; } function mapUrlFor(researcher) { const target = normalize(researcher); const exact = MAP_URLS.find(([name]) => normalize(name) === target); if (exact) return exact[1]; const partial = MAP_URLS.find(([name]) => target.includes(normalize(name)) || normalize(name).includes(target)); return partial?.[1] || ""; } function columnIndex(headers, names, fallback) { const targets = names.map(normalize); const index = headers.findIndex((header) => targets.includes(normalize(header))); return index >= 0 ? index : fallback; } function sampleKey(row) { const parts = [ row.commercialRecord, row.contractNumber, row.establishmentName, ].map(normalize); return crypto.createHash("sha256").update(parts.join("|")).digest("hex").slice(0, 24); } function encryptPayload(plainPayload, secret, iterations) { const salt = crypto.randomBytes(16); const iv = crypto.randomBytes(12); const key = crypto.pbkdf2Sync(secret, salt, iterations, 32, "sha256"); const cipher = crypto.createCipheriv("aes-256-gcm", key, iv); const encrypted = Buffer.concat([cipher.update(plainPayload, "utf8"), cipher.final()]); return { iterations, salt: salt.toString("base64"), iv: iv.toString("base64"), payload: Buffer.concat([encrypted, cipher.getAuthTag()]).toString("base64"), }; } function encryptedBlock(name, encrypted) { return [ `const ${name} = Object.freeze({`, ` iterations: ${encrypted.iterations},`, ` salt: "${encrypted.salt}",`, ` iv: "${encrypted.iv}",`, ` payload: "${encrypted.payload}",`, "});", ].join("\n"); } if (!inputPath || !outputPath) { console.error("Usage: node generate-data.mjs [password]"); process.exitCode = 1; } else { const workbook = await SpreadsheetFile.importXlsx(await FileBlob.load(inputPath)); const sheet = workbook.worksheets.getItemAt(0); const values = sheet.getUsedRange(true).values; const headers = (values[0] || []).map(clean); const columns = { commercialRecord: columnIndex(headers, ["السجل التجاري"], 11), researcher: columnIndex(headers, ["اسم الباحث/ة", "اسم الباحث", "الباحث"], 0), establishmentName: columnIndex(headers, ["إسم المنشأة", "اسم المنشأة", "المنشأة"], 1), alternateName: 2, contractNumber: columnIndex(headers, ["رقم العقد"], 3), city: columnIndex(headers, ["المدينة الصناعية", "توضيح المدينة"], 5), fallbackCity: 4, status: columnIndex(headers, ["حالة الاستيفاء"], 6), madonStatement: columnIndex(headers, ["افادة مدن", "إفادة مدن"], 7), madonNote: columnIndex(headers, ["ملاحظة مدن", "ملاحظات مدن"], 8), x: columnIndex(headers, ["اكس", "x"], 9), y: columnIndex(headers, ["واي", "y"], 10), unifiedNumber: columnIndex(headers, ["الرقم الموحد"], 12), activityCode: columnIndex(headers, ["ترميز النشاط"], 13), activity: columnIndex(headers, ["النشاط"], 14), }; const rows = []; for (const source of values.slice(1)) { const researcher = clean(source[columns.researcher]); const establishmentName = clean(source[columns.establishmentName]) || clean(source[columns.alternateName]); if (!researcher || !establishmentName) continue; const madonStatement = clean(source[columns.madonStatement]); const madonNote = clean(source[columns.madonNote]); const madonCoordinates = coordinateFromText(madonNote); const x = numeric(source[columns.x]); const y = numeric(source[columns.y]); const baseCoordinates = x !== null && y !== null && x >= 35 && x <= 60 && y >= 15 && y <= 35 ? `${y}, ${x}` : ""; const coordinates = madonCoordinates || baseCoordinates; const locationType = madonCoordinates ? "madon" : baseCoordinates ? "base" : madonStatement || madonNote ? "statement" : "none"; const row = { researcher, establishmentName, contractNumber: clean(source[columns.contractNumber]), city: canonicalCity(source[columns.city], source[columns.fallbackCity]), sourceCity: clean(source[columns.city]) || clean(source[columns.fallbackCity]), representativeCity: canonicalCity("", source[columns.fallbackCity] || source[columns.city]), status: clean(source[columns.status]), madonStatement, madonNoteText: madonCoordinates ? "" : madonNote, coordinates, locationType, commercialRecord: clean(source[columns.commercialRecord]), unifiedNumber: clean(source[columns.unifiedNumber]), activityCode: clean(source[columns.activityCode]), activity: clean(source[columns.activity]), }; row.sampleKey = sampleKey(row); rows.push(row); } const counts = new Map(); rows.forEach((row) => counts.set(row.researcher, (counts.get(row.researcher) || 0) + 1)); const researchers = [...counts.entries()] .map(([name, count]) => ({ name, count, mapUrl: mapUrlFor(name) })) .sort((a, b) => a.name.localeCompare(b.name, "ar", { sensitivity: "base" })); const missingMaps = researchers.filter((item) => !item.mapUrl).map((item) => item.name); if (missingMaps.length) console.warn(`Missing map URLs: ${missingMaps.join(", ")}`); const plainPayload = JSON.stringify({ version: 3, generatedAt: new Date().toISOString(), researchers, rows, }); const iterations = 310000; const researcherEncrypted = encryptPayload(plainPayload, password, iterations); const supervisorEncrypted = encryptPayload(plainPayload, supervisorPassword, iterations); const output = [ "/* Generated encrypted data. Rebuild this file from the source workbook. */", encryptedBlock("ENCRYPTED_DATA", researcherEncrypted), "", encryptedBlock("SUPERVISOR_ENCRYPTED_DATA", supervisorEncrypted), "", ].join("\n"); await fs.writeFile(outputPath, output, "utf8"); console.log(JSON.stringify({ rows: rows.length, researchers: researchers.length, madonCoordinates: rows.filter((row) => row.locationType === "madon").length, baseCoordinates: rows.filter((row) => row.locationType === "base").length, statements: rows.filter((row) => row.locationType === "statement").length, noLocation: rows.filter((row) => row.locationType === "none").length, missingMaps, })); }