File size: 1,770 Bytes
0e759d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import IORedis from "ioredis";
import { ScrapeOptions } from "../controllers/v1/types";
import { InternalOptions } from "../scraper/scrapeURL";
import { logger as _logger } from "./logger";
const logger = _logger.child({ module: "cache" });

export const cacheRedis = process.env.CACHE_REDIS_URL
  ? new IORedis(process.env.CACHE_REDIS_URL, {
      maxRetriesPerRequest: null,
    })
  : null;

export function cacheKey(
  url: string,
  scrapeOptions: ScrapeOptions,
  internalOptions: InternalOptions,
): string | null {
  if (!cacheRedis) return null;

  // these options disqualify a cache
  if (
    internalOptions.v0CrawlOnlyUrls ||
    internalOptions.forceEngine?.includes("cache") ||
    scrapeOptions.fastMode ||
    internalOptions.atsv ||
    (scrapeOptions.actions && scrapeOptions.actions.length > 0)
  ) {
    return null;
  }

  return "cache:" + url + ":waitFor:" + scrapeOptions.waitFor;
}

export type CacheEntry = {
  url: string;
  html: string;
  statusCode: number;
  error?: string;
};

export async function saveEntryToCache(key: string, entry: CacheEntry) {
  if (!cacheRedis) return;

  if (!entry.html || entry.html.length < 100) {
    logger.warn("Skipping cache save for short HTML", {
      key,
      htmlLength: entry.html?.length,
    });
    return;
  }

  try {
    await cacheRedis.set(key, JSON.stringify(entry), "EX", 14400); // 4 hours in seconds
  } catch (error) {
    logger.warn("Failed to save to cache", { key, error });
  }
}

export async function getEntryFromCache(
  key: string,
): Promise<CacheEntry | null> {
  if (!cacheRedis) return null;

  try {
    return JSON.parse((await cacheRedis.get(key)) ?? "null");
  } catch (error) {
    logger.warn("Failed to get from cache", { key, error });
    return null;
  }
}