Spaces:
Running
Running
fix(memory): prevent server OOM from unbounded Next.js fetch cache and dataset info cache
Browse files- Add `cache: "no-store"` to fetchParquetFile, fetchJson, and tasks.jsonl fetch
so Next.js does not accumulate large parquet ArrayBuffers in its Data Cache
across many requests over days of continuous operation
- Cap datasetInfoCache at 200 entries with FIFO eviction to prevent unbounded growth
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
src/app/[org]/[dataset]/[episode]/fetch-data.ts
CHANGED
|
@@ -363,7 +363,7 @@ async function getEpisodeDataV2(
|
|
| 363 |
if (!task && allData.length > 0) {
|
| 364 |
try {
|
| 365 |
const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
|
| 366 |
-
const tasksResponse = await fetch(tasksUrl);
|
| 367 |
|
| 368 |
if (tasksResponse.ok) {
|
| 369 |
const tasksText = await tasksResponse.text();
|
|
|
|
| 363 |
if (!task && allData.length > 0) {
|
| 364 |
try {
|
| 365 |
const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
|
| 366 |
+
const tasksResponse = await fetch(tasksUrl, { cache: "no-store" });
|
| 367 |
|
| 368 |
if (tasksResponse.ok) {
|
| 369 |
const tasksText = await tasksResponse.text();
|
src/utils/parquetUtils.ts
CHANGED
|
@@ -25,7 +25,7 @@ export interface DatasetMetadata {
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export async function fetchJson<T>(url: string): Promise<T> {
|
| 28 |
-
const res = await fetch(url);
|
| 29 |
if (!res.ok) {
|
| 30 |
throw new Error(
|
| 31 |
`Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
|
|
@@ -43,7 +43,7 @@ export function formatStringWithVars(
|
|
| 43 |
|
| 44 |
// Fetch and parse the Parquet file
|
| 45 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 46 |
-
const res = await fetch(url);
|
| 47 |
|
| 48 |
if (!res.ok) {
|
| 49 |
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export async function fetchJson<T>(url: string): Promise<T> {
|
| 28 |
+
const res = await fetch(url, { cache: "no-store" });
|
| 29 |
if (!res.ok) {
|
| 30 |
throw new Error(
|
| 31 |
`Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
|
|
|
|
| 43 |
|
| 44 |
// Fetch and parse the Parquet file
|
| 45 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 46 |
+
const res = await fetch(url, { cache: "no-store" });
|
| 47 |
|
| 48 |
if (!res.ok) {
|
| 49 |
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
src/utils/versionUtils.ts
CHANGED
|
@@ -31,12 +31,13 @@ export interface DatasetInfo {
|
|
| 31 |
features: Record<string, FeatureInfo>;
|
| 32 |
}
|
| 33 |
|
| 34 |
-
// In-memory cache for dataset info (5 min TTL)
|
| 35 |
const datasetInfoCache = new Map<
|
| 36 |
string,
|
| 37 |
{ data: DatasetInfo; expiry: number }
|
| 38 |
>();
|
| 39 |
const CACHE_TTL_MS = 5 * 60 * 1000;
|
|
|
|
| 40 |
|
| 41 |
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
| 42 |
const cached = datasetInfoCache.get(repoId);
|
|
@@ -72,6 +73,10 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 72 |
);
|
| 73 |
}
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
datasetInfoCache.set(repoId, {
|
| 76 |
data: data as DatasetInfo,
|
| 77 |
expiry: Date.now() + CACHE_TTL_MS,
|
|
|
|
| 31 |
features: Record<string, FeatureInfo>;
|
| 32 |
}
|
| 33 |
|
| 34 |
+
// In-memory cache for dataset info (5 min TTL, max 200 entries)
|
| 35 |
const datasetInfoCache = new Map<
|
| 36 |
string,
|
| 37 |
{ data: DatasetInfo; expiry: number }
|
| 38 |
>();
|
| 39 |
const CACHE_TTL_MS = 5 * 60 * 1000;
|
| 40 |
+
const CACHE_MAX_SIZE = 200;
|
| 41 |
|
| 42 |
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
| 43 |
const cached = datasetInfoCache.get(repoId);
|
|
|
|
| 73 |
);
|
| 74 |
}
|
| 75 |
|
| 76 |
+
if (datasetInfoCache.size >= CACHE_MAX_SIZE) {
|
| 77 |
+
// Evict the oldest entry (Maps preserve insertion order)
|
| 78 |
+
datasetInfoCache.delete(datasetInfoCache.keys().next().value!);
|
| 79 |
+
}
|
| 80 |
datasetInfoCache.set(repoId, {
|
| 81 |
data: data as DatasetInfo,
|
| 82 |
expiry: Date.now() + CACHE_TTL_MS,
|