Spaces:
Sleeping
Sleeping
File size: 7,850 Bytes
da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 c9986d8 a2c885c c9986d8 a2c885c c9986d8 da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 a2c885c da957b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | import { NextResponse } from 'next/server';
import fs from 'fs';
import { commit } from '@huggingface/hub';
import { HF_DATASET_ID, HF_DATASET_BASE_URL, getCorpus, getDocRepoPath, getDocLocalPath } from '../../../utils/config.js';
const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
/**
* PUT /api/validate
* Body: { corpus, document_index, page_number, dataset_index, updates }
*/
export async function PUT(request) {
try {
const { corpus: corpusId, document_index, page_number, dataset_index, updates } = await request.json();
const corpus = getCorpus(corpusId);
if (document_index == null || page_number == null || dataset_index == null || !updates) {
return NextResponse.json(
{ error: 'Missing document_index, page_number, dataset_index, or updates' },
{ status: 400 }
);
}
let pagesData;
if (isHFSpace()) {
const repoPath = getDocRepoPath(corpus, document_index);
const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`;
const res = await fetch(url, {
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
});
if (!res.ok) {
return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 });
}
pagesData = await res.json();
} else {
const filePath = getDocLocalPath(corpus, document_index);
if (!fs.existsSync(filePath)) {
return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 });
}
pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
}
const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number);
if (pageIdx === -1) {
return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 });
}
const datasets = pagesData[pageIdx].datasets || [];
if (dataset_index < 0 || dataset_index >= datasets.length) {
return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 });
}
// Per-annotator validation
const currentEntry = pagesData[pageIdx].datasets[dataset_index];
const annotator = updates.annotator || 'unknown';
const validationFields = ['human_validated', 'human_verdict', 'human_notes', 'annotator', 'validated_at'];
const isValidation = validationFields.some(f => f in updates);
if (isValidation) {
const validations = currentEntry.validations || [];
const existingIdx = validations.findIndex(v => v.annotator === annotator);
const validationEntry = {
human_validated: updates.human_validated,
human_verdict: updates.human_verdict,
human_notes: updates.human_notes || null,
annotator,
validated_at: updates.validated_at || new Date().toISOString(),
};
if (existingIdx >= 0) {
validations[existingIdx] = validationEntry;
} else {
validations.push(validationEntry);
}
pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, validations };
} else {
pagesData[pageIdx].datasets[dataset_index] = { ...currentEntry, ...updates };
}
// Save back
if (isHFSpace()) {
const repoPath = getDocRepoPath(corpus, document_index);
const content = JSON.stringify(pagesData, null, 2);
await commit({
repo: { type: 'dataset', name: HF_DATASET_ID },
credentials: { accessToken: process.env.HF_TOKEN },
title: `Validate ${corpus.id}/doc_${document_index} page ${page_number}`,
operations: [{
operation: 'addOrUpdate',
path: repoPath,
content: new Blob([content], { type: 'application/json' }),
}],
});
} else {
const filePath = getDocLocalPath(corpus, document_index);
fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2));
}
return NextResponse.json({
success: true,
dataset: pagesData[pageIdx].datasets[dataset_index],
});
} catch (error) {
console.error('Validate error:', error);
return NextResponse.json({ error: 'Failed to validate: ' + error.message }, { status: 500 });
}
}
/**
* DELETE /api/validate?corpus=X&doc=X&page=Y&idx=Z
*/
export async function DELETE(request) {
try {
const { searchParams } = new URL(request.url);
const corpusId = searchParams.get('corpus');
const document_index = parseInt(searchParams.get('doc'), 10);
const page_number = parseInt(searchParams.get('page'), 10);
const dataset_index = parseInt(searchParams.get('idx'), 10);
const corpus = getCorpus(corpusId);
if (isNaN(document_index) || isNaN(page_number) || isNaN(dataset_index)) {
return NextResponse.json(
{ error: 'Missing doc, page, or idx parameter' },
{ status: 400 }
);
}
let pagesData;
if (isHFSpace()) {
const repoPath = getDocRepoPath(corpus, document_index);
const url = `${HF_DATASET_BASE_URL}/raw/main/${repoPath}`;
const res = await fetch(url, {
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
});
if (!res.ok) {
return NextResponse.json({ error: `Document not found on HF (${corpus.id})` }, { status: 404 });
}
pagesData = await res.json();
} else {
const filePath = getDocLocalPath(corpus, document_index);
if (!fs.existsSync(filePath)) {
return NextResponse.json({ error: `Document not found locally (${corpus.id})` }, { status: 404 });
}
pagesData = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
}
const pageIdx = pagesData.findIndex(p => p.document?.pages?.[0] === page_number);
if (pageIdx === -1) {
return NextResponse.json({ error: `Page ${page_number} not found` }, { status: 404 });
}
const datasets = pagesData[pageIdx].datasets || [];
if (dataset_index < 0 || dataset_index >= datasets.length) {
return NextResponse.json({ error: `Dataset index ${dataset_index} out of range` }, { status: 400 });
}
pagesData[pageIdx].datasets.splice(dataset_index, 1);
if (isHFSpace()) {
const repoPath = getDocRepoPath(corpus, document_index);
const content = JSON.stringify(pagesData, null, 2);
await commit({
repo: { type: 'dataset', name: HF_DATASET_ID },
credentials: { accessToken: process.env.HF_TOKEN },
title: `Delete from ${corpus.id}/doc_${document_index} page ${page_number}`,
operations: [{
operation: 'addOrUpdate',
path: repoPath,
content: new Blob([content], { type: 'application/json' }),
}],
});
} else {
const filePath = getDocLocalPath(corpus, document_index);
fs.writeFileSync(filePath, JSON.stringify(pagesData, null, 2));
}
return NextResponse.json({ success: true });
} catch (error) {
console.error('Delete error:', error);
return NextResponse.json({ error: 'Failed to delete: ' + error.message }, { status: 500 });
}
}
|