ICS2 / generate-data.mjs
stat2025's picture
Upload 3 files
96dd2e4 verified
Raw
History Blame Contribute Delete
11.7 kB
import fs from "node:fs/promises";
import crypto from "node:crypto";
import { FileBlob, SpreadsheetFile } from "@oai/artifact-tool";
const [inputPath, outputPath, password = "20302030", supervisorPassword = "1448"] = process.argv.slice(2);
const MAP_URLS = [
["أماني مصطفى عبدالله الطيب", "https://stat2025-map.static.hf.space/Rahn/01.html"],
["اسعد بن ماجد بن", "https://stat2025-map.static.hf.space/Rahn/02.html"],
["ريم بنت محمد بن عبدالعزيز الملحم", "https://stat2025-map.static.hf.space/Rahn/03.html"],
["زكي بن عيسى بن", "https://stat2025-map.static.hf.space/Rahn/04.html"],
["ساره حسين بن عبدالهادي بوخمسين", "https://stat2025-map.static.hf.space/Rahn/05.html"],
["ساره خالد سليمان المحيسن", "https://stat2025-map.static.hf.space/Rahn/06.html"],
["صالح عبدالله صالح الدخيل", "https://stat2025-map.static.hf.space/Rahn/07.html"],
["طيبه فالح بن عبدالله الرويشد", "https://stat2025-map.static.hf.space/Rahn/08.html"],
["عبدالرحمن عبدالله سعد الحماد", "https://stat2025-map.static.hf.space/Rahn/09.html"],
["عبدالعزيز فهد عبدالعزيز العبلان", "https://stat2025-map.static.hf.space/Rahn/10.html"],
["علي جابر بن علي", "https://stat2025-map.static.hf.space/Rahn/11.html"],
["عماد بن عيسى بن", "https://stat2025-map.static.hf.space/Rahn/12.html"],
["غادة سعد عبدالرحمن الراحله", "https://stat2025-map.static.hf.space/Rahn/13.html"],
["فارس سمير سليماني", "https://stat2025-map.static.hf.space/Rahn/14.html"],
["فاطمة حميدي هيف القحطاني", "https://stat2025-map.static.hf.space/Rahn/15.html"],
["فوز عائد نومان المطيري", "https://stat2025-map.static.hf.space/Rahn/16.html"],
["قنوت محمد بن عبدالله آل حماد", "https://stat2025-map.static.hf.space/Rahn/17.html"],
["لين أحمد بن عبدالعزيز القصير", "https://stat2025-map.static.hf.space/Rahn/18.html"],
["ماجد سعد ناصر السبيعي", "https://stat2025-map.static.hf.space/Rahn/19.html"],
["مرتضى عبدالجليل بن عيسى", "https://stat2025-map.static.hf.space/Rahn/20.html"],
["ناصر منصور علي الرويس", "https://stat2025-map.static.hf.space/Rahn/21.html"],
["نبأ عادل بن عبدالكريم", "https://stat2025-map.static.hf.space/Rahn/22.html"],
["نوار عوض مقبل العنزى", "https://stat2025-map.static.hf.space/Rahn/23.html"],
["نوف سعود بن سالم الخثعمي", "https://stat2025-map.static.hf.space/Rahn/24.html"],
["نيللي حسين عبدالله الجعص", "https://stat2025-map.static.hf.space/Rahn/25.html"],
["هيه عبدالعزيز المزيني", "https://stat2025-map.static.hf.space/Rahn/26.html"],
];
function clean(value) {
if (value === null || value === undefined || value === 0) return "";
return String(value).replace(/\u00a0/g, " ").replace(/\s+/g, " ").trim();
}
function normalize(value) {
return clean(value)
.normalize("NFKD")
.replace(/[\u064B-\u065F\u0670]/g, "")
.replace(/[أإآ]/g, "ا")
.replace(/ى/g, "ي")
.replace(/ة/g, "ه")
.toLowerCase();
}
function numeric(value) {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
function coordinateFromText(value) {
const text = clean(value);
const match = text.match(/(-?\d{1,2}(?:\.\d+)?)\s*[,،]\s*(-?\d{1,3}(?:\.\d+)?)/);
if (!match) return "";
const first = Number(match[1]);
const second = Number(match[2]);
if (first >= 15 && first <= 35 && second >= 35 && second <= 60) return `${first}, ${second}`;
if (second >= 15 && second <= 35 && first >= 35 && first <= 60) return `${second}, ${first}`;
return "";
}
function canonicalCity(primary, fallback) {
const raw = clean(primary) || clean(fallback) || "غير محدد";
const city = normalize(raw);
if (city.includes("الصناعيه الثانيه") && city.includes("الدمام")) return "المدينة الصناعية الثانية بالدمام";
if ((city.includes("الصناعيه الاولي") || city === "الصناعيه الاولي") && !city.includes("الاحساء")) {
return "المدينة الصناعية الأولى بالدمام";
}
if (city.includes("الصناعيه الثالثه") && city.includes("الدمام")) return "المدينة الصناعية الثالثة بالدمام";
if (city.includes("الاحساء") || city === "العيون" || city.includes("واحه مدن")) return "المدينة الصناعية الأولى بالأحساء";
if (city.includes("حفر الباطن")) return "المدينة الصناعية بحفر الباطن";
if (city.includes("الملك سلمان") || city.includes("سكيكو") || city.includes("ارامكو")) return "مدينة الملك سلمان (سبارك)";
if (city.includes("الصناعيه الثانيه") && city.includes("الرياض")) return "المدينة الصناعية الثانية بالرياض";
return raw;
}
function mapUrlFor(researcher) {
const target = normalize(researcher);
const exact = MAP_URLS.find(([name]) => normalize(name) === target);
if (exact) return exact[1];
const partial = MAP_URLS.find(([name]) => target.includes(normalize(name)) || normalize(name).includes(target));
return partial?.[1] || "";
}
function columnIndex(headers, names, fallback) {
const targets = names.map(normalize);
const index = headers.findIndex((header) => targets.includes(normalize(header)));
return index >= 0 ? index : fallback;
}
function sampleKey(row) {
const parts = [
row.commercialRecord,
row.contractNumber,
row.establishmentName,
].map(normalize);
return crypto.createHash("sha256").update(parts.join("|")).digest("hex").slice(0, 24);
}
function encryptPayload(plainPayload, secret, iterations) {
const salt = crypto.randomBytes(16);
const iv = crypto.randomBytes(12);
const key = crypto.pbkdf2Sync(secret, salt, iterations, 32, "sha256");
const cipher = crypto.createCipheriv("aes-256-gcm", key, iv);
const encrypted = Buffer.concat([cipher.update(plainPayload, "utf8"), cipher.final()]);
return {
iterations,
salt: salt.toString("base64"),
iv: iv.toString("base64"),
payload: Buffer.concat([encrypted, cipher.getAuthTag()]).toString("base64"),
};
}
function encryptedBlock(name, encrypted) {
return [
`const ${name} = Object.freeze({`,
` iterations: ${encrypted.iterations},`,
` salt: "${encrypted.salt}",`,
` iv: "${encrypted.iv}",`,
` payload: "${encrypted.payload}",`,
"});",
].join("\n");
}
if (!inputPath || !outputPath) {
console.error("Usage: node generate-data.mjs <input.xlsx> <output.js> [password]");
process.exitCode = 1;
} else {
const workbook = await SpreadsheetFile.importXlsx(await FileBlob.load(inputPath));
const sheet = workbook.worksheets.getItemAt(0);
const values = sheet.getUsedRange(true).values;
const headers = (values[0] || []).map(clean);
const columns = {
commercialRecord: columnIndex(headers, ["السجل التجاري"], 11),
researcher: columnIndex(headers, ["اسم الباحث/ة", "اسم الباحث", "الباحث"], 0),
establishmentName: columnIndex(headers, ["إسم المنشأة", "اسم المنشأة", "المنشأة"], 1),
alternateName: 2,
contractNumber: columnIndex(headers, ["رقم العقد"], 3),
city: columnIndex(headers, ["المدينة الصناعية", "توضيح المدينة"], 5),
fallbackCity: 4,
status: columnIndex(headers, ["حالة الاستيفاء"], 6),
madonStatement: columnIndex(headers, ["افادة مدن", "إفادة مدن"], 7),
madonNote: columnIndex(headers, ["ملاحظة مدن", "ملاحظات مدن"], 8),
x: columnIndex(headers, ["اكس", "x"], 9),
y: columnIndex(headers, ["واي", "y"], 10),
unifiedNumber: columnIndex(headers, ["الرقم الموحد"], 12),
activityCode: columnIndex(headers, ["ترميز النشاط"], 13),
activity: columnIndex(headers, ["النشاط"], 14),
};
const rows = [];
for (const source of values.slice(1)) {
const researcher = clean(source[columns.researcher]);
const establishmentName = clean(source[columns.establishmentName]) || clean(source[columns.alternateName]);
if (!researcher || !establishmentName) continue;
const madonStatement = clean(source[columns.madonStatement]);
const madonNote = clean(source[columns.madonNote]);
const madonCoordinates = coordinateFromText(madonNote);
const x = numeric(source[columns.x]);
const y = numeric(source[columns.y]);
const baseCoordinates =
x !== null && y !== null && x >= 35 && x <= 60 && y >= 15 && y <= 35 ? `${y}, ${x}` : "";
const coordinates = madonCoordinates || baseCoordinates;
const locationType = madonCoordinates
? "madon"
: baseCoordinates
? "base"
: madonStatement || madonNote
? "statement"
: "none";
const row = {
researcher,
establishmentName,
contractNumber: clean(source[columns.contractNumber]),
city: canonicalCity(source[columns.city], source[columns.fallbackCity]),
sourceCity: clean(source[columns.city]) || clean(source[columns.fallbackCity]),
representativeCity: canonicalCity("", source[columns.fallbackCity] || source[columns.city]),
status: clean(source[columns.status]),
madonStatement,
madonNoteText: madonCoordinates ? "" : madonNote,
coordinates,
locationType,
commercialRecord: clean(source[columns.commercialRecord]),
unifiedNumber: clean(source[columns.unifiedNumber]),
activityCode: clean(source[columns.activityCode]),
activity: clean(source[columns.activity]),
};
row.sampleKey = sampleKey(row);
rows.push(row);
}
const counts = new Map();
rows.forEach((row) => counts.set(row.researcher, (counts.get(row.researcher) || 0) + 1));
const researchers = [...counts.entries()]
.map(([name, count]) => ({ name, count, mapUrl: mapUrlFor(name) }))
.sort((a, b) => a.name.localeCompare(b.name, "ar", { sensitivity: "base" }));
const missingMaps = researchers.filter((item) => !item.mapUrl).map((item) => item.name);
if (missingMaps.length) console.warn(`Missing map URLs: ${missingMaps.join(", ")}`);
const plainPayload = JSON.stringify({
version: 3,
generatedAt: new Date().toISOString(),
researchers,
rows,
});
const iterations = 310000;
const researcherEncrypted = encryptPayload(plainPayload, password, iterations);
const supervisorEncrypted = encryptPayload(plainPayload, supervisorPassword, iterations);
const output = [
"/* Generated encrypted data. Rebuild this file from the source workbook. */",
encryptedBlock("ENCRYPTED_DATA", researcherEncrypted),
"",
encryptedBlock("SUPERVISOR_ENCRYPTED_DATA", supervisorEncrypted),
"",
].join("\n");
await fs.writeFile(outputPath, output, "utf8");
console.log(JSON.stringify({
rows: rows.length,
researchers: researchers.length,
madonCoordinates: rows.filter((row) => row.locationType === "madon").length,
baseCoordinates: rows.filter((row) => row.locationType === "base").length,
statements: rows.filter((row) => row.locationType === "statement").length,
noLocation: rows.filter((row) => row.locationType === "none").length,
missingMaps,
}));
}