File size: 7,850 Bytes
da957b0
 
 
a2c885c
da957b0
 
 
 
 
a2c885c
da957b0
 
 
a2c885c
 
da957b0
 
 
 
 
 
 
 
 
 
 
a2c885c
 
da957b0
 
 
 
a2c885c
da957b0
 
 
a2c885c
da957b0
a2c885c
da957b0
 
 
 
 
 
 
 
 
 
 
 
 
 
a2c885c
da957b0
c9986d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2c885c
c9986d8
a2c885c
c9986d8
da957b0
 
 
a2c885c
da957b0
 
 
a2c885c
 
da957b0
 
 
 
 
 
 
a2c885c
da957b0
 
 
 
 
 
 
 
 
 
 
 
 
 
a2c885c
da957b0
 
 
 
a2c885c
da957b0
 
 
 
a2c885c
 
da957b0
 
 
 
 
 
 
 
 
 
a2c885c
 
da957b0
 
 
 
a2c885c
da957b0
 
 
a2c885c
da957b0
a2c885c
da957b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2c885c
da957b0
 
 
a2c885c
 
da957b0
 
 
 
 
 
 
a2c885c
da957b0
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import { NextResponse } from 'next/server';
import fs from 'fs';
import { commit } from '@huggingface/hub';
import { HF_DATASET_ID, HF_DATASET_BASE_URL, getCorpus, getDocRepoPath, getDocLocalPath } from '../../../utils/config.js';

const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';

/**
 * PUT /api/validate
 * Body: { corpus, document_index, page_number, dataset_index, updates }
 */
export async function PUT(request) {
    try {
        const { corpus: corpusId, document_index, page_number, dataset_index, updates } = await request.json();
        const corpus = getCorpus(corpusId);

        if (document_index == null || page_number == null || dataset_index == null || !updates) {
            return NextResponse.json(
                { error: 'Missing document_index, page_number, dataset_index, or updates' },
                { status: 400 }
            );
        }

        let pagesData;

        if (isHFSpace()) {
            const repoPath = getDocRepoPath(corpus, document_index);
            const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`;
            const res = await fetch(url, {
                headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
            });
            if (!res.ok) {
                return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 });
            }
            pagesData = await res.json();
        } else {
            const filePath = getDocLocalPath(corpus, document_index);
            if (!fs.existsSync(filePath)) {
                return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 });
            }
            pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
        }

        const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number);
        if (pageIdx === -1) {
            return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 });
        }

        const datasets = pagesData[pageIdx].datasets || [];
        if (dataset_index < 0 || dataset_index >= datasets.length) {
            return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 });
        }

        // Per-annotator validation
        const currentEntry = pagesData[pageIdx].datasets[dataset_index];
        const annotator = updates.annotator || 'unknown';
        const validationFields = ['human_validated', 'human_verdict', 'human_notes', 'annotator', 'validated_at'];
        const isValidation = validationFields.some(f => f in updates);

        if (isValidation) {
            const validations = currentEntry.validations || [];
            const existingIdx = validations.findIndex(v => v.annotator === annotator);
            const validationEntry = {
                human_validated: updates.human_validated,
                human_verdict: updates.human_verdict,
                human_notes: updates.human_notes || null,
                annotator,
                validated_at: updates.validated_at || new Date().toISOString(),
            };

            if (existingIdx >= 0) {
                validations[existingIdx] = validationEntry;
            } else {
                validations.push(validationEntry);
            }

            pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, validations };
        } else {
            pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, ...updates };
        }

        // Save back
        if (isHFSpace()) {
            const repoPath = getDocRepoPath(corpus, document_index);
            const content = JSON.stringify(pagesData, null, 2);
            await commit({
                repo: { type: 'dataset', name: HF_DATASET_ID },
                credentials: { accessToken: process.env.HF_TOKEN },
                title: `Validate ${corpus.id}/doc_${document_index} page ${page_number}`,
                operations: [{
                    operation: 'addOrUpdate',
                    path: repoPath,
                    content: new Blob([content], { type: 'application/json' }),
                }],
            });
        } else {
            const filePath = getDocLocalPath(corpus, document_index);
            fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2));
        }

        return NextResponse.json({
            success: true,
            dataset: pagesData[pageIdx].datasets[dataset_index],
        });
    } catch (error) {
        console.error('Validate error:', error);
        return NextResponse.json({ error: 'Failed to validate: ' + error.message }, { status: 500 });
    }
}

/**
 * DELETE /api/validate?corpus=X&doc=X&page=Y&idx=Z
 */
export async function DELETE(request) {
    try {
        const { searchParams } = new URL(request.url);
        const corpusId = searchParams.get('corpus');
        const document_index = parseInt(searchParams.get('doc'), 10);
        const page_number = parseInt(searchParams.get('page'), 10);
        const dataset_index = parseInt(searchParams.get('idx'), 10);

        const corpus = getCorpus(corpusId);

        if (isNaN(document_index) || isNaN(page_number) || isNaN(dataset_index)) {
            return NextResponse.json(
                { error: 'Missing doc, page, or idx parameter' },
                { status: 400 }
            );
        }

        let pagesData;

        if (isHFSpace()) {
            const repoPath = getDocRepoPath(corpus, document_index);
            const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`;
            const res = await fetch(url, {
                headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
            });
            if (!res.ok) {
                return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 });
            }
            pagesData = await res.json();
        } else {
            const filePath = getDocLocalPath(corpus, document_index);
            if (!fs.existsSync(filePath)) {
                return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 });
            }
            pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
        }

        const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number);
        if (pageIdx === -1) {
            return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 });
        }

        const datasets = pagesData[pageIdx].datasets || [];
        if (dataset_index < 0 || dataset_index >= datasets.length) {
            return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 });
        }

        pagesData[pageIdx].datasets.splice(dataset_index, 1);

        if (isHFSpace()) {
            const repoPath = getDocRepoPath(corpus, document_index);
            const content = JSON.stringify(pagesData, null, 2);
            await commit({
                repo: { type: 'dataset', name: HF_DATASET_ID },
                credentials: { accessToken: process.env.HF_TOKEN },
                title: `Delete from ${corpus.id}/doc_${document_index} page ${page_number}`,
                operations: [{
                    operation: 'addOrUpdate',
                    path: repoPath,
                    content: new Blob([content], { type: 'application/json' }),
                }],
            });
        } else {
            const filePath = getDocLocalPath(corpus, document_index);
            fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2));
        }

        return NextResponse.json({ success: true });
    } catch (error) {
        console.error('Delete error:', error);
        return NextResponse.json({ error: 'Failed to delete: ' + error.message }, { status: 500 });
    }
}