Spaces:
Configuration error
Configuration error
File size: 4,183 Bytes
53f8186 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | import { chromium } from "@playwright/test";
import { existsSync } from "node:fs";
const baseUrl = process.argv[2] || "http://localhost:5173/?model=qwen25coder&device=wasm";
const executablePath = process.env.CHROMIUM_PATH || (existsSync("/usr/bin/google-chrome") ? "/usr/bin/google-chrome" : "/snap/bin/chromium");
const maxNewTokens = process.env.MAX_NEW_TOKENS || "256";
const tasks = [
{
id: "simple-calc",
prompt:
"Create hello.js containing JavaScript that computes 21 * 2 and prints result: 42, then run node hello.js.",
expectedOutput: "42",
expectedFiles: ["hello.js"],
},
{
id: "npm-dependency",
prompt:
"Install npm package is-number@7.0.0, create check-package.mjs that imports it and prints dependency check: true, then run it with Node.",
expectedOutput: "dependency check: true",
expectedFiles: ["check-package.mjs", "node_modules"],
},
{
id: "multi-file-module",
prompt:
"Create src/math.mjs exporting multiply(a,b), create test.mjs importing it and printing multi result: 42 for multiply(6,7), then run node test.mjs.",
expectedOutput: "multi result: 42",
expectedFiles: ["test.mjs", "src"],
},
];
async function runTask(task) {
const browser = await chromium.launch({
executablePath,
headless: true,
args: ["--no-sandbox", "--disable-dev-shm-usage"],
});
try {
const page = await browser.newPage();
page.setDefaultTimeout(1200000);
const consoleLines = [];
page.on("console", (message) => {
consoleLines.push(`${message.type()}: ${message.text()}`);
});
page.on("pageerror", (error) => {
consoleLines.push(`pageerror: ${error.stack || error.message}`);
});
await page.goto(baseUrl, { waitUntil: "networkidle" });
await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Ready");
const isolated = await page.evaluate(() => globalThis.crossOriginIsolated);
if (!isolated) throw new Error("Page is not cross-origin isolated.");
await page.fill("#max-new-tokens", maxNewTokens);
await page.fill("#temperature", "0");
await page.selectOption("#gate-device", "wasm");
await page.click("#confirm-load-model");
await page.waitForFunction(() => window.__piWebAgent?.modelReady === true, null, {
timeout: 600000,
});
const started = Date.now();
await page.fill("#prompt", task.prompt);
await page.click("#send");
await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Agent running", null, {
timeout: 120000,
});
await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Ready", null, {
timeout: 1200000,
});
const transcript = await page.evaluate(() => window.__piWebAgent?.transcript || "");
const files = await page.textContent("#files");
const events = await page.textContent("#event-log");
const modelStatus = await page.textContent("#model-status");
const lowerTranscript = transcript.toLowerCase();
const lowerEvents = events?.toLowerCase() || "";
const lowerFiles = files?.toLowerCase() || "";
if (!lowerTranscript.includes(task.expectedOutput.toLowerCase())) {
throw new Error(`Expected output ${task.expectedOutput}.\n\nTranscript:\n${transcript}\n\nEvents:\n${events}`);
}
for (const file of task.expectedFiles) {
if (!lowerFiles.includes(file.toLowerCase())) {
throw new Error(`Expected ${file} in file listing.\n\nFiles:\n${files}\n\nTranscript:\n${transcript}`);
}
}
if (!lowerEvents.includes("tool: run_command finished")) {
throw new Error(`Expected run_command tool execution.\n\nEvents:\n${events}`);
}
return {
id: task.id,
ok: true,
maxNewTokens,
modelStatus,
runMs: Date.now() - started,
files,
transcriptChars: transcript.length,
warnings: consoleLines.filter((line) => line.startsWith("warning:")).slice(-3),
};
} finally {
await browser.close();
}
}
const results = [];
for (const task of tasks) {
results.push(await runTask(task));
}
console.log(JSON.stringify(results, null, 2));
|