File size: 3,006 Bytes
a2c885c
da957b0
 
 
 
 
 
42df2a9
 
 
 
a2c885c
42df2a9
9cc2491
da957b0
9cc2491
da957b0
 
 
 
 
9cc2491
 
 
da957b0
9cc2491
da957b0
 
42df2a9
 
a2c885c
 
42df2a9
9cc2491
5b87eae
9cc2491
a2c885c
 
9cc2491
 
da957b0
42df2a9
9cc2491
 
a2c885c
9cc2491
 
 
 
 
a2c885c
9cc2491
 
 
a2c885c
9cc2491
 
 
a2c885c
da957b0
42df2a9
da957b0
9cc2491
42df2a9
da957b0
a2c885c
da957b0
 
42df2a9
 
da957b0
 
 
 
42df2a9
 
da957b0
 
 
 
42df2a9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import { HF_DATASET_BASE_URL, getCorpus, getDocRepoPath, getDocLocalPath } from '../../../utils/config.js';
import fs from 'fs';

const isHFSpace = () => {
    return process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
};

export async function GET(request) {
    const { searchParams } = new URL(request.url);
    const index = searchParams.get('index');
    const page = searchParams.get('page');
    const corpusId = searchParams.get('corpus');

    if (index === null || page === null) {
        return new Response(
            JSON.stringify({ error: "Missing index or page parameter" }),
            { status: 400, headers: { 'Content-Type': 'application/json' } }
        );
    }

    const indexNum = parseInt(index, 10);
    const pageNum = parseInt(page, 10);

    if (isNaN(indexNum) || isNaN(pageNum) || indexNum < 0 || pageNum < 0) {
        return new Response(
            JSON.stringify({ error: "index and page must be non-negative integers" }),
            { status: 400, headers: { 'Content-Type': 'application/json' } }
        );
    }

    const corpus = getCorpus(corpusId);

    try {
        let pagesData;

        if (isHFSpace()) {
            const docRepoPath = getDocRepoPath(corpus, indexNum);
            const docUrl = `${HF_DATASET_BASE_URL}/raw/main/${docRepoPath}`;
            const res = await fetch(docUrl, {
                headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
            });

            if (!res.ok) {
                return new Response(
                    JSON.stringify({ error: `doc_${indexNum} not found on HF (${corpus.id})` }),
                    { status: res.status, headers: { 'Content-Type': 'application/json' } }
                );
            }
            pagesData = await res.json();
        } else {
            const filePath = getDocLocalPath(corpus, indexNum);

            if (!fs.existsSync(filePath)) {
                return new Response(
                    JSON.stringify({ error: `doc_${indexNum} not found locally (${corpus.id})` }),
                    { status: 404, headers: { 'Content-Type': 'application/json' } }
                );
            }
            pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
        }

        const pageData = pagesData.find(p => p.document?.pages?.[0] === pageNum);

        if (!pageData) {
            return new Response(
                JSON.stringify({ error: `Page ${pageNum} not found in doc ${indexNum} (${corpus.id})` }),
                { status: 404, headers: { 'Content-Type': 'application/json' } }
            );
        }

        return new Response(JSON.stringify(pageData), {
            status: 200,
            headers: { 'Content-Type': 'application/json' }
        });
    } catch (error) {
        console.error(error);
        return new Response(
            JSON.stringify({ error: "Failed to fetch document page" }),
            { status: 500, headers: { 'Content-Type': 'application/json' } }
        );
    }
}