Spaces:
Sleeping
Sleeping
Commit ·
fabd779
1
Parent(s): 290ad35
feat: per-annotator document assignment
Browse files- Add annotator_assignments.json config (docs list or docs_range per user)
- Documents API filters by ?user= query param
- Client passes username from OAuth cookie to documents fetch
- rafmacalaba gets docs 1-10, rafamacalaba gets docs 10-100
- Falls back to MAX_DOCS_TO_SCAN if no assignment
- annotator_assignments.json +22 -0
- app/api/documents/route.js +77 -5
- app/page.js +24 -19
annotator_assignments.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rafmacalaba": {
|
| 3 |
+
"docs": [
|
| 4 |
+
1,
|
| 5 |
+
2,
|
| 6 |
+
3,
|
| 7 |
+
4,
|
| 8 |
+
5,
|
| 9 |
+
6,
|
| 10 |
+
7,
|
| 11 |
+
8,
|
| 12 |
+
9,
|
| 13 |
+
10
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
"rafamacalaba": {
|
| 17 |
+
"docs_range": [
|
| 18 |
+
10,
|
| 19 |
+
100
|
| 20 |
+
]
|
| 21 |
+
}
|
| 22 |
+
}
|
app/api/documents/route.js
CHANGED
|
@@ -1,7 +1,71 @@
|
|
| 1 |
import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
// Fetch the index file from HF Datasets
|
| 6 |
const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_data/wbg_pdf_links.json`;
|
| 7 |
const linksRes = await fetch(linksUrl, {
|
|
@@ -21,10 +85,18 @@ export async function GET() {
|
|
| 21 |
|
| 22 |
const links = await linksRes.json();
|
| 23 |
|
| 24 |
-
// Filter to successful links
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
// Parallel fetch
|
| 28 |
const results = await Promise.allSettled(
|
| 29 |
successLinks.map(async (link) => {
|
| 30 |
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
|
|
@@ -58,7 +130,7 @@ export async function GET() {
|
|
| 58 |
status: 200,
|
| 59 |
headers: {
|
| 60 |
'Content-Type': 'application/json',
|
| 61 |
-
'Cache-Control': '
|
| 62 |
}
|
| 63 |
});
|
| 64 |
} catch (error) {
|
|
|
|
| 1 |
import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
|
| 2 |
|
| 3 |
+
const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
|
| 4 |
+
|
| 5 |
+
/**
|
| 6 |
+
* Load annotator assignments config.
|
| 7 |
+
* Supports both local file and HF fetch.
|
| 8 |
+
*/
|
| 9 |
+
async function loadAssignments() {
|
| 10 |
try {
|
| 11 |
+
if (isHFSpace()) {
|
| 12 |
+
const url = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/annotator_assignments.json`;
|
| 13 |
+
const res = await fetch(url, {
|
| 14 |
+
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` },
|
| 15 |
+
});
|
| 16 |
+
if (res.ok) return await res.json();
|
| 17 |
+
} else {
|
| 18 |
+
const fs = await import('fs');
|
| 19 |
+
const path = await import('path');
|
| 20 |
+
const filePath = path.default.join(process.cwd(), 'annotator_assignments.json');
|
| 21 |
+
if (fs.default.existsSync(filePath)) {
|
| 22 |
+
return JSON.parse(fs.default.readFileSync(filePath, 'utf-8'));
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
} catch (e) {
|
| 26 |
+
console.warn('Could not load annotator assignments:', e.message);
|
| 27 |
+
}
|
| 28 |
+
return null;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
/**
|
| 32 |
+
* Get the set of allowed doc indices for a user.
|
| 33 |
+
* Returns null if no assignments (= show all).
|
| 34 |
+
*/
|
| 35 |
+
function getAllowedDocs(assignments, username) {
|
| 36 |
+
if (!assignments || !username) return null;
|
| 37 |
+
|
| 38 |
+
const userConfig = assignments[username] || assignments[username.toLowerCase()];
|
| 39 |
+
if (!userConfig) return null;
|
| 40 |
+
|
| 41 |
+
const allowed = new Set();
|
| 42 |
+
|
| 43 |
+
// Explicit list: "docs": [1, 2, 3]
|
| 44 |
+
if (userConfig.docs) {
|
| 45 |
+
userConfig.docs.forEach(d => allowed.add(d));
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
// Range: "docs_range": [10, 100] (inclusive)
|
| 49 |
+
if (userConfig.docs_range) {
|
| 50 |
+
const [start, end] = userConfig.docs_range;
|
| 51 |
+
for (let i = start; i <= end; i++) {
|
| 52 |
+
allowed.add(i);
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
return allowed.size > 0 ? allowed : null;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
export async function GET(request) {
|
| 60 |
+
try {
|
| 61 |
+
// Get username from query param
|
| 62 |
+
const { searchParams } = new URL(request.url);
|
| 63 |
+
const username = searchParams.get('user');
|
| 64 |
+
|
| 65 |
+
// Load assignments
|
| 66 |
+
const assignments = await loadAssignments();
|
| 67 |
+
const allowedDocs = getAllowedDocs(assignments, username);
|
| 68 |
+
|
| 69 |
// Fetch the index file from HF Datasets
|
| 70 |
const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_data/wbg_pdf_links.json`;
|
| 71 |
const linksRes = await fetch(linksUrl, {
|
|
|
|
| 85 |
|
| 86 |
const links = await linksRes.json();
|
| 87 |
|
| 88 |
+
// Filter to successful links
|
| 89 |
+
let successLinks = links.filter(l => l.status === 'success');
|
| 90 |
+
|
| 91 |
+
// If user has assignments, filter to allowed docs only
|
| 92 |
+
if (allowedDocs) {
|
| 93 |
+
successLinks = successLinks.filter(l => allowedDocs.has(l.index));
|
| 94 |
+
} else {
|
| 95 |
+
// No assignments — take first N
|
| 96 |
+
successLinks = successLinks.slice(0, MAX_DOCS_TO_SCAN);
|
| 97 |
+
}
|
| 98 |
|
| 99 |
+
// Parallel fetch
|
| 100 |
const results = await Promise.allSettled(
|
| 101 |
successLinks.map(async (link) => {
|
| 102 |
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
|
|
|
|
| 130 |
status: 200,
|
| 131 |
headers: {
|
| 132 |
'Content-Type': 'application/json',
|
| 133 |
+
'Cache-Control': 'no-store'
|
| 134 |
}
|
| 135 |
});
|
| 136 |
} catch (error) {
|
app/page.js
CHANGED
|
@@ -43,26 +43,10 @@ export default function Home() {
|
|
| 43 |
const annotatablePages = currentDoc?.annotatable_pages ?? [];
|
| 44 |
const currentPageNumber = annotatablePages[pageIdx] ?? null;
|
| 45 |
|
| 46 |
-
//
|
| 47 |
-
useEffect(() => {
|
| 48 |
-
fetch('/api/documents')
|
| 49 |
-
.then(res => res.json())
|
| 50 |
-
.then(data => {
|
| 51 |
-
setDocuments(data);
|
| 52 |
-
if (data.length > 0) {
|
| 53 |
-
setSelectedDocIndex(data[0].index);
|
| 54 |
-
setPageIdx(0);
|
| 55 |
-
}
|
| 56 |
-
setLoading(false);
|
| 57 |
-
})
|
| 58 |
-
.catch(err => {
|
| 59 |
-
console.error("Failed to load documents", err);
|
| 60 |
-
setLoading(false);
|
| 61 |
-
});
|
| 62 |
-
}, []);
|
| 63 |
-
|
| 64 |
-
// Read HF OAuth cookie for annotator identity
|
| 65 |
useEffect(() => {
|
|
|
|
|
|
|
| 66 |
try {
|
| 67 |
const cookie = document.cookie
|
| 68 |
.split('; ')
|
|
@@ -70,12 +54,33 @@ export default function Home() {
|
|
| 70 |
if (cookie) {
|
| 71 |
const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('=')));
|
| 72 |
if (user.username) {
|
|
|
|
| 73 |
setAnnotatorName(user.username);
|
| 74 |
}
|
| 75 |
}
|
| 76 |
} catch (e) {
|
| 77 |
console.warn('Could not read hf_user cookie', e);
|
| 78 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}, []);
|
| 80 |
|
| 81 |
// Update currentDoc when selection changes
|
|
|
|
| 43 |
const annotatablePages = currentDoc?.annotatable_pages ?? [];
|
| 44 |
const currentPageNumber = annotatablePages[pageIdx] ?? null;
|
| 45 |
|
| 46 |
+
// Read HF OAuth cookie and load assigned documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
useEffect(() => {
|
| 48 |
+
// 1. Read username from cookie
|
| 49 |
+
let username = '';
|
| 50 |
try {
|
| 51 |
const cookie = document.cookie
|
| 52 |
.split('; ')
|
|
|
|
| 54 |
if (cookie) {
|
| 55 |
const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('=')));
|
| 56 |
if (user.username) {
|
| 57 |
+
username = user.username;
|
| 58 |
setAnnotatorName(user.username);
|
| 59 |
}
|
| 60 |
}
|
| 61 |
} catch (e) {
|
| 62 |
console.warn('Could not read hf_user cookie', e);
|
| 63 |
}
|
| 64 |
+
|
| 65 |
+
// 2. Fetch documents (filtered by user if logged in)
|
| 66 |
+
const url = username
|
| 67 |
+
? `/api/documents?user=${encodeURIComponent(username)}`
|
| 68 |
+
: '/api/documents';
|
| 69 |
+
|
| 70 |
+
fetch(url)
|
| 71 |
+
.then(res => res.json())
|
| 72 |
+
.then(data => {
|
| 73 |
+
setDocuments(data);
|
| 74 |
+
if (data.length > 0) {
|
| 75 |
+
setSelectedDocIndex(data[0].index);
|
| 76 |
+
setPageIdx(0);
|
| 77 |
+
}
|
| 78 |
+
setLoading(false);
|
| 79 |
+
})
|
| 80 |
+
.catch(err => {
|
| 81 |
+
console.error("Failed to load documents", err);
|
| 82 |
+
setLoading(false);
|
| 83 |
+
});
|
| 84 |
}, []);
|
| 85 |
|
| 86 |
// Update currentDoc when selection changes
|