import { Response } from "express"; import { CrawlErrorsResponse, CrawlStatusParams, RequestWithAuth, } from "./types"; import { getCrawl, getCrawlJobs, } from "../../lib/crawl-redis"; import { getScrapeQueue, redisConnection } from "../../services/queue-service"; import { configDotenv } from "dotenv"; import { Job } from "bullmq"; configDotenv(); export async function getJob(id: string) { const job = await getScrapeQueue().getJob(id); if (!job) return job; return job; } export async function getJobs(ids: string[]) { const jobs: (Job & { id: string })[] = ( await Promise.all(ids.map((x) => getScrapeQueue().getJob(x))) ).filter((x) => x) as (Job & { id: string })[]; return jobs; } export async function crawlErrorsController( req: RequestWithAuth, res: Response, ) { const sc = await getCrawl(req.params.jobId); if (!sc) { return res.status(404).json({ success: false, error: "Job not found" }); } if (sc.team_id !== req.auth.team_id) { return res.status(403).json({ success: false, error: "Forbidden" }); } let jobStatuses = await Promise.all( (await getCrawlJobs(req.params.jobId)).map( async (x) => [x, await getScrapeQueue().getJobState(x)] as const, ), ); const failedJobIDs: string[] = []; for (const [id, status] of jobStatuses) { if (status === "failed") { failedJobIDs.push(id); } } res.status(200).json({ errors: (await getJobs(failedJobIDs)).map((x) => ({ id: x.id, timestamp: x.finishedOn !== undefined ? new Date(x.finishedOn).toISOString() : undefined, url: x.data.url, error: x.failedReason, })), robotsBlocked: await redisConnection.smembers( "crawl:" + req.params.jobId + ":robots_blocked", ), }); }