Chart / dataset /web /amCharts /supplement_from_weblink.js
Pekku's picture
Upload folder using huggingface_hub
79dc9c8 verified
const fs = require("fs");
const path = require("path");
const cp = require("child_process");
const vm = require("vm");
const ROOT = __dirname;
const CATEGORIES = ["bar", "box", "heatmap", "line", "other", "pie", "scatter"];
const UA =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
const MAX_PER_CATEGORY = 20;
const textCache = new Map();
const bufferCache = new Map();
function ensureDir(dirPath) {
fs.mkdirSync(dirPath, { recursive: true });
}
function readLines(filePath) {
if (!fs.existsSync(filePath)) return [];
return fs
.readFileSync(filePath, "utf8")
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
}
function fetchText(url) {
if (textCache.has(url)) return textCache.get(url);
const args = ["-L", "--silent", "--show-error", "--noproxy", "*", "-A", UA, url];
const out = cp.execFileSync("curl.exe", args, { maxBuffer: 64 * 1024 * 1024 });
const text = out.toString("utf8");
textCache.set(url, text);
return text;
}
function fetchBuffer(url) {
if (bufferCache.has(url)) return bufferCache.get(url);
const args = ["-L", "--silent", "--show-error", "--noproxy", "*", "-A", UA, url];
const out = cp.execFileSync("curl.exe", args, { maxBuffer: 128 * 1024 * 1024 });
bufferCache.set(url, out);
return out;
}
function toSlug(url) {
try {
return new URL(url).pathname.replace(/^\/|\/$/g, "").split("/").pop() || "";
} catch (_) {
return "";
}
}
function hashSeed(input) {
let hash = 2166136261;
for (const ch of input) {
hash ^= ch.charCodeAt(0);
hash = Math.imul(hash, 16777619);
}
return (hash >>> 0) || 1;
}
function patchRandom(jsCode, seed) {
const randomRegex = /\bMath\.random\s*\(\s*\)/g;
if (!randomRegex.test(jsCode)) {
return { code: jsCode, fixed: false };
}
const helper = [
`let __seed = ${seed >>> 0};`,
"function __seededRandom() {",
" __seed = (__seed * 1664525 + 1013904223) >>> 0;",
" return __seed / 4294967296;",
"}",
""
].join("\n");
const replaced = jsCode.replace(randomRegex, "__seededRandom()");
return { code: `${helper}${replaced}`, fixed: true };
}
function parseDemoData(pageHtml, sourceUrl) {
const marker = "var demoData =";
const idx = pageHtml.indexOf(marker);
if (idx < 0) {
throw new Error(`demoData not found: ${sourceUrl}`);
}
const after = pageHtml.slice(idx);
const end = after.indexOf("</script>");
if (end < 0) {
throw new Error(`demoData script not terminated: ${sourceUrl}`);
}
const scriptChunk = after.slice(0, end);
const eqIdx = scriptChunk.indexOf("=");
if (eqIdx < 0) {
throw new Error(`demoData assignment malformed: ${sourceUrl}`);
}
const objectExpr = scriptChunk.slice(eqIdx + 1).trim().replace(/;?\s*$/, "");
const wrapped = `(${objectExpr})`;
const demoData = vm.runInNewContext(wrapped, {}, { timeout: 2000 });
if (!demoData || typeof demoData !== "object") {
throw new Error(`demoData parse failed: ${sourceUrl}`);
}
return demoData;
}
function mimeFromUrl(url) {
const lower = url.toLowerCase().split("?")[0].split("#")[0];
if (lower.endsWith(".json") || lower.endsWith(".geojson")) return "application/json";
if (lower.endsWith(".csv")) return "text/csv";
if (lower.endsWith(".tsv")) return "text/tab-separated-values";
if (lower.endsWith(".txt")) return "text/plain";
if (lower.endsWith(".js")) return "text/javascript";
if (lower.endsWith(".css")) return "text/css";
if (lower.endsWith(".svg")) return "image/svg+xml";
if (lower.endsWith(".png")) return "image/png";
if (lower.endsWith(".jpg") || lower.endsWith(".jpeg")) return "image/jpeg";
if (lower.endsWith(".gif")) return "image/gif";
if (lower.endsWith(".webp")) return "image/webp";
return "application/octet-stream";
}
function escapeRegex(text) {
return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function localizeUrls(jsCode, resourceUrls, stats) {
const resources = new Set(resourceUrls || []);
const urlRegex = /(['"`])(https?:\/\/[^'"`\s)]+)\1/g;
const urls = new Set();
let match;
while ((match = urlRegex.exec(jsCode)) !== null) {
urls.add(match[2]);
}
let localized = jsCode;
for (const url of urls) {
if (resources.has(url)) continue;
const buf = fetchBuffer(url);
const mime = mimeFromUrl(url);
const dataUri = `data:${mime};base64,${buf.toString("base64")}`;
const re = new RegExp(escapeRegex(url), "g");
localized = localized.replace(re, dataUri);
stats.externalLocalized += 1;
}
return localized;
}
function inlineResource(resourceUrl, stats) {
const lower = resourceUrl.toLowerCase().split("?")[0].split("#")[0];
if (lower.endsWith(".css")) {
const css = fetchText(resourceUrl);
stats.resourcesInlined += 1;
return `<style>\n${css}\n</style>`;
}
let js = fetchText(resourceUrl);
// amCharts CDN index.js expects currentScript.src; inline mode has empty src.
if (/\/lib\/5\/index\.js$/i.test(lower)) {
js = js.replace(
"/(.*\\/)[^\\/]*$/.exec(_)[1]",
"((/(.*\\/)[^\\/]*$/.exec(_)||[])[1]||\"./\")"
);
}
js = js.replace(/<\/script/gi, "<\\/script");
stats.resourcesInlined += 1;
return `<script>\n${js}\n</script>`;
}
function buildHtml(demoData, demoJs) {
const snippet = typeof demoData.html === "string" && demoData.html.trim() ? demoData.html : "<div id=\"chartdiv\"></div>";
const css = typeof demoData.css === "string" ? demoData.css : "";
const resources = Array.isArray(demoData.resources) ? [...new Set(demoData.resources)] : [];
const resourceBlocks = resources.map((url) => inlineResource(url, buildHtml.stats)).join("\n");
const escapedDemoJs = demoJs.replace(/<\/script/gi, "<\\/script");
return [
"<!doctype html>",
"<html lang=\"en\">",
"<head>",
" <meta charset=\"utf-8\" />",
" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />",
` <title>${(demoData.title || "amCharts Demo").toString()}</title>`,
" <style>",
" html, body { width: 100%; height: 100%; margin: 0; padding: 0; }",
" #chartdiv { width: 100%; height: 100vh; }",
css,
" </style>",
"</head>",
"<body>",
snippet,
resourceBlocks,
"<script>",
escapedDemoJs,
"</script>",
"</body>",
"</html>",
""
].join("\n");
}
buildHtml.stats = { resourcesInlined: 0 };
function getExistingNumbers(dirPath) {
if (!fs.existsSync(dirPath)) return [];
return fs
.readdirSync(dirPath)
.filter((name) => /^\d+\.html$/i.test(name))
.map((name) => Number(name.replace(/\.html$/i, "")))
.filter((n) => Number.isFinite(n) && n > 0)
.sort((a, b) => a - b);
}
function main() {
const forceRewrite = process.env.FORCE_REWRITE_WEBLINK === "1";
const stats = {
generated: 0,
rewritten: 0,
skippedExisting: 0,
skippedLimit: 0,
fixedRandom: 0,
externalLocalized: 0,
resourcesInlined: 0,
failures: []
};
for (const category of CATEGORIES) {
const categoryDir = path.join(ROOT, category);
const linksPath = path.join(categoryDir, "weblink.txt");
if (!fs.existsSync(linksPath)) continue;
ensureDir(categoryDir);
const links = readLines(linksPath);
const sourcePath = path.join(categoryDir, "sources.txt");
const sourceLines = readLines(sourcePath);
const existingUrls = new Set(
sourceLines.map((line) => {
const tab = line.indexOf("\t");
return tab >= 0 ? line.slice(tab + 1).trim() : "";
}).filter(Boolean)
);
const existingMap = new Map(
sourceLines.map((line) => {
const tab = line.indexOf("\t");
if (tab < 0) return [line.trim(), ""];
return [line.slice(tab + 1).trim(), line.slice(0, tab).trim()];
}).filter((pair) => pair[0])
);
let numbers = getExistingNumbers(categoryDir);
let next = numbers.length ? numbers[numbers.length - 1] + 1 : 1;
const updatedSource = [...sourceLines];
for (const url of links) {
if (!forceRewrite && existingUrls.has(url)) {
stats.skippedExisting += 1;
continue;
}
let outName = existingMap.get(url) || "";
if (!outName) {
if (next > MAX_PER_CATEGORY) {
stats.skippedLimit += 1;
break;
}
outName = `${next}.html`;
}
if (!/^\d+\.html$/i.test(outName)) {
stats.failures.push({
category,
url,
error: `Invalid source mapping filename: ${outName}`
});
continue;
}
const outNumber = Number(outName.replace(/\.html$/i, ""));
if (outNumber > MAX_PER_CATEGORY) {
stats.skippedLimit += 1;
continue;
}
try {
const html = fetchText(url);
const demoData = parseDemoData(html, url);
const resources = Array.isArray(demoData.resources) ? demoData.resources : [];
let demoJs = typeof demoData.javascript === "string" ? demoData.javascript : "";
const patched = patchRandom(demoJs, hashSeed(toSlug(url) || url));
demoJs = patched.code;
if (patched.fixed) stats.fixedRandom += 1;
demoJs = localizeUrls(demoJs, resources, stats);
buildHtml.stats.resourcesInlined = 0;
const finalHtml = buildHtml(demoData, demoJs);
stats.resourcesInlined += buildHtml.stats.resourcesInlined;
fs.writeFileSync(path.join(categoryDir, outName), finalHtml, "utf8");
if (!existingUrls.has(url)) {
updatedSource.push(`${outName}\t${url}`);
stats.generated += 1;
next += 1;
} else {
stats.rewritten += 1;
}
existingUrls.add(url);
} catch (error) {
stats.failures.push({
category,
url,
error: String(error && error.message ? error.message : error)
});
}
}
fs.writeFileSync(sourcePath, updatedSource.length ? `${updatedSource.join("\n")}\n` : "", "utf8");
}
fs.writeFileSync(path.join(ROOT, "supplement_report.json"), JSON.stringify(stats, null, 2), "utf8");
console.log(JSON.stringify(stats, null, 2));
}
main();