Spaces:
Configuration error
Configuration error
| import { chromium } from "@playwright/test"; | |
| import { existsSync } from "node:fs"; | |
| const baseUrl = process.argv[2] || "http://localhost:5173/?model=qwen25coder&device=wasm"; | |
| const executablePath = process.env.CHROMIUM_PATH || (existsSync("/usr/bin/google-chrome") ? "/usr/bin/google-chrome" : "/snap/bin/chromium"); | |
| const maxNewTokens = process.env.MAX_NEW_TOKENS || "256"; | |
| const tasks = [ | |
| { | |
| id: "simple-calc", | |
| prompt: | |
| "Create hello.js containing JavaScript that computes 21 * 2 and prints result: 42, then run node hello.js.", | |
| expectedOutput: "42", | |
| expectedFiles: ["hello.js"], | |
| }, | |
| { | |
| id: "npm-dependency", | |
| prompt: | |
| "Install npm package is-number@7.0.0, create check-package.mjs that imports it and prints dependency check: true, then run it with Node.", | |
| expectedOutput: "dependency check: true", | |
| expectedFiles: ["check-package.mjs", "node_modules"], | |
| }, | |
| { | |
| id: "multi-file-module", | |
| prompt: | |
| "Create src/math.mjs exporting multiply(a,b), create test.mjs importing it and printing multi result: 42 for multiply(6,7), then run node test.mjs.", | |
| expectedOutput: "multi result: 42", | |
| expectedFiles: ["test.mjs", "src"], | |
| }, | |
| ]; | |
| async function runTask(task) { | |
| const browser = await chromium.launch({ | |
| executablePath, | |
| headless: true, | |
| args: ["--no-sandbox", "--disable-dev-shm-usage"], | |
| }); | |
| try { | |
| const page = await browser.newPage(); | |
| page.setDefaultTimeout(1200000); | |
| const consoleLines = []; | |
| page.on("console", (message) => { | |
| consoleLines.push(`${message.type()}: ${message.text()}`); | |
| }); | |
| page.on("pageerror", (error) => { | |
| consoleLines.push(`pageerror: ${error.stack || error.message}`); | |
| }); | |
| await page.goto(baseUrl, { waitUntil: "networkidle" }); | |
| await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Ready"); | |
| const isolated = await page.evaluate(() => globalThis.crossOriginIsolated); | |
| if (!isolated) throw new Error("Page is not cross-origin isolated."); | |
| await page.fill("#max-new-tokens", maxNewTokens); | |
| await page.fill("#temperature", "0"); | |
| await page.selectOption("#gate-device", "wasm"); | |
| await page.click("#confirm-load-model"); | |
| await page.waitForFunction(() => window.__piWebAgent?.modelReady === true, null, { | |
| timeout: 600000, | |
| }); | |
| const started = Date.now(); | |
| await page.fill("#prompt", task.prompt); | |
| await page.click("#send"); | |
| await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Agent running", null, { | |
| timeout: 120000, | |
| }); | |
| await page.waitForFunction(() => document.querySelector("#status")?.textContent === "Ready", null, { | |
| timeout: 1200000, | |
| }); | |
| const transcript = await page.evaluate(() => window.__piWebAgent?.transcript || ""); | |
| const files = await page.textContent("#files"); | |
| const events = await page.textContent("#event-log"); | |
| const modelStatus = await page.textContent("#model-status"); | |
| const lowerTranscript = transcript.toLowerCase(); | |
| const lowerEvents = events?.toLowerCase() || ""; | |
| const lowerFiles = files?.toLowerCase() || ""; | |
| if (!lowerTranscript.includes(task.expectedOutput.toLowerCase())) { | |
| throw new Error(`Expected output ${task.expectedOutput}.\n\nTranscript:\n${transcript}\n\nEvents:\n${events}`); | |
| } | |
| for (const file of task.expectedFiles) { | |
| if (!lowerFiles.includes(file.toLowerCase())) { | |
| throw new Error(`Expected ${file} in file listing.\n\nFiles:\n${files}\n\nTranscript:\n${transcript}`); | |
| } | |
| } | |
| if (!lowerEvents.includes("tool: run_command finished")) { | |
| throw new Error(`Expected run_command tool execution.\n\nEvents:\n${events}`); | |
| } | |
| return { | |
| id: task.id, | |
| ok: true, | |
| maxNewTokens, | |
| modelStatus, | |
| runMs: Date.now() - started, | |
| files, | |
| transcriptChars: transcript.length, | |
| warnings: consoleLines.filter((line) => line.startsWith("warning:")).slice(-3), | |
| }; | |
| } finally { | |
| await browser.close(); | |
| } | |
| } | |
| const results = []; | |
| for (const task of tasks) { | |
| results.push(await runTask(task)); | |
| } | |
| console.log(JSON.stringify(results, null, 2)); | |