| #!/usr/bin/env node |
| import fs from "fs/promises"; |
| import path from "path"; |
| import { Command } from "commander"; |
| import { runCollect, compareItems, exportCsv } from "./core/pipeline.js"; |
| import { startServer } from "./server-start.js"; |
| import type { CollectOptions, DedupeMode, Platform, RawProductItem } from "./types.js"; |
|
|
| const program = new Command(); |
| program.name("pricebot").description("电商商品价格采集与对比工具").version("0.1.0"); |
|
|
| function parsePlatforms(v: string): Platform[] { |
| const parts = v |
| .split(",") |
| .map((x) => x.trim()) |
| .filter(Boolean); |
| const ok = new Set<Platform>(["jd", "tb", "pdd"]); |
| const out: Platform[] = []; |
| for (const p of parts) { |
| if (!ok.has(p as Platform)) throw new Error(`未知平台: ${p}`); |
| out.push(p as Platform); |
| } |
| return out.length ? out : ["jd", "tb", "pdd"]; |
| } |
|
|
| async function writeJson(p: string, data: unknown) { |
| const out = path.resolve(process.cwd(), p); |
| await fs.mkdir(path.dirname(out), { recursive: true }); |
| await fs.writeFile(out, `${JSON.stringify(data, null, 2)}\n`, "utf-8"); |
| } |
|
|
| program |
| .command("collect") |
| .requiredOption("--q <keyword>", "关键词") |
| .option("--platform <list>", "平台(逗号分隔:jd,tb,pdd)", "jd,tb,pdd") |
| .option("--limit <n>", "每个平台最多抓取条数", "30") |
| .option("--concurrency <n>", "平台并发数", "3") |
| .option("--qps <n>", "单进程节流(请求/秒,0 为不限制)", "") |
| .option("--retries <n>", "请求失败重试次数", "") |
| .option("--log", "打印采集日志") |
| .option("--dedupe <mode>", "去重模式:platform|cross", "platform") |
| .option("--mode <mode>", "采集模式:mock|live", "mock") |
| .option("--out <file>", "输出 JSON 路径", "out/collect.json") |
| .action(async (opts) => { |
| const options: CollectOptions = { |
| q: String(opts.q), |
| platforms: parsePlatforms(String(opts.platform)), |
| limit: Number(opts.limit), |
| concurrency: Number(opts.concurrency), |
| qps: opts.qps === "" ? undefined : Number(opts.qps), |
| retries: opts.retries === "" ? undefined : Number(opts.retries), |
| dedupe: String(opts.dedupe) as DedupeMode, |
| mode: String(opts.mode) as CollectOptions["mode"] |
| }; |
| const res = await runCollect(options, { |
| onEvent: opts.log |
| ? (e) => { |
| const p = e.platform ? `[${e.platform}] ` : ""; |
| process.stderr.write(`${e.t} ${e.level.toUpperCase()} ${p}${e.msg}\n`); |
| } |
| : undefined |
| }); |
| await writeJson(String(opts.out), res); |
| process.stdout.write(`${opts.out}\n`); |
| if (res.warnings.length) process.stderr.write(`${res.warnings.join("\n")}\n`); |
| }); |
|
|
| program |
| .command("compare") |
| .requiredOption("--in <file>", "输入 JSON(collect 输出或 rawItems 数组)") |
| .option("--dedupe <mode>", "去重模式:platform|cross", "platform") |
| .option("--out <file>", "输出 JSON 路径", "out/compare.json") |
| .option("--export <fmt>", "额外导出:csv", "") |
| .action(async (opts) => { |
| const inPath = path.resolve(process.cwd(), String(opts.in)); |
| const s = await fs.readFile(inPath, "utf-8"); |
| const data = JSON.parse(s) as { rawItems?: RawProductItem[] } | RawProductItem[]; |
| const rawItems = Array.isArray(data) ? data : (data.rawItems ?? []); |
| const { items } = compareItems(rawItems, { dedupe: String(opts.dedupe) as DedupeMode }); |
| const out = { items }; |
| await writeJson(String(opts.out), out); |
| if (String(opts.export).toLowerCase() === "csv") { |
| const csvPath = String(opts.out).replace(/\.json$/i, ".csv"); |
| await exportCsv(items, csvPath); |
| process.stdout.write(`${opts.out}\n${csvPath}\n`); |
| return; |
| } |
| process.stdout.write(`${opts.out}\n`); |
| }); |
|
|
| program |
| .command("serve") |
| .option("--port <n>", "端口", "5175") |
| .action(async (opts) => { |
| await startServer({ port: Number(opts.port) }); |
| }); |
|
|
| program.parseAsync(process.argv).catch((err: unknown) => { |
| const msg = err instanceof Error ? err.message : String(err); |
| process.stderr.write(`${msg}\n`); |
| process.exit(1); |
| }); |
|
|