rafmacalaba commited on
Commit
fabd779
·
1 Parent(s): 290ad35

feat: per-annotator document assignment

Browse files

- Add annotator_assignments.json config (docs list or docs_range per user)
- Documents API filters by ?user= query param
- Client passes username from OAuth cookie to documents fetch
- rafmacalaba gets docs 1-10, rafamacalaba gets docs 10-100
- Falls back to MAX_DOCS_TO_SCAN if no assignment

annotator_assignments.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "rafmacalaba": {
3
+ "docs": [
4
+ 1,
5
+ 2,
6
+ 3,
7
+ 4,
8
+ 5,
9
+ 6,
10
+ 7,
11
+ 8,
12
+ 9,
13
+ 10
14
+ ]
15
+ },
16
+ "rafamacalaba": {
17
+ "docs_range": [
18
+ 10,
19
+ 100
20
+ ]
21
+ }
22
+ }
app/api/documents/route.js CHANGED
@@ -1,7 +1,71 @@
1
  import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
2
 
3
- export async function GET() {
 
 
 
 
 
 
4
  try {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  // Fetch the index file from HF Datasets
6
  const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_data/wbg_pdf_links.json`;
7
  const linksRes = await fetch(linksUrl, {
@@ -21,10 +85,18 @@ export async function GET() {
21
 
22
  const links = await linksRes.json();
23
 
24
- // Filter to successful links and take the first N
25
- const successLinks = links.filter(l => l.status === 'success').slice(0, MAX_DOCS_TO_SCAN);
 
 
 
 
 
 
 
 
26
 
27
- // Parallel fetch — much faster than sequential scanning
28
  const results = await Promise.allSettled(
29
  successLinks.map(async (link) => {
30
  const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
@@ -58,7 +130,7 @@ export async function GET() {
58
  status: 200,
59
  headers: {
60
  'Content-Type': 'application/json',
61
- 'Cache-Control': 'public, s-maxage=3600, stale-while-revalidate=59'
62
  }
63
  });
64
  } catch (error) {
 
1
  import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
2
 
3
+ const isHFSpace = () => process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
4
+
5
+ /**
6
+ * Load annotator assignments config.
7
+ * Supports both local file and HF fetch.
8
+ */
9
+ async function loadAssignments() {
10
  try {
11
+ if (isHFSpace()) {
12
+ const url = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/annotator_assignments.json`;
13
+ const res = await fetch(url, {
14
+ headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` },
15
+ });
16
+ if (res.ok) return await res.json();
17
+ } else {
18
+ const fs = await import('fs');
19
+ const path = await import('path');
20
+ const filePath = path.default.join(process.cwd(), 'annotator_assignments.json');
21
+ if (fs.default.existsSync(filePath)) {
22
+ return JSON.parse(fs.default.readFileSync(filePath, 'utf-8'));
23
+ }
24
+ }
25
+ } catch (e) {
26
+ console.warn('Could not load annotator assignments:', e.message);
27
+ }
28
+ return null;
29
+ }
30
+
31
+ /**
32
+ * Get the set of allowed doc indices for a user.
33
+ * Returns null if no assignments (= show all).
34
+ */
35
+ function getAllowedDocs(assignments, username) {
36
+ if (!assignments || !username) return null;
37
+
38
+ const userConfig = assignments[username] || assignments[username.toLowerCase()];
39
+ if (!userConfig) return null;
40
+
41
+ const allowed = new Set();
42
+
43
+ // Explicit list: "docs": [1, 2, 3]
44
+ if (userConfig.docs) {
45
+ userConfig.docs.forEach(d => allowed.add(d));
46
+ }
47
+
48
+ // Range: "docs_range": [10, 100] (inclusive)
49
+ if (userConfig.docs_range) {
50
+ const [start, end] = userConfig.docs_range;
51
+ for (let i = start; i <= end; i++) {
52
+ allowed.add(i);
53
+ }
54
+ }
55
+
56
+ return allowed.size > 0 ? allowed : null;
57
+ }
58
+
59
+ export async function GET(request) {
60
+ try {
61
+ // Get username from query param
62
+ const { searchParams } = new URL(request.url);
63
+ const username = searchParams.get('user');
64
+
65
+ // Load assignments
66
+ const assignments = await loadAssignments();
67
+ const allowedDocs = getAllowedDocs(assignments, username);
68
+
69
  // Fetch the index file from HF Datasets
70
  const linksUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_data/wbg_pdf_links.json`;
71
  const linksRes = await fetch(linksUrl, {
 
85
 
86
  const links = await linksRes.json();
87
 
88
+ // Filter to successful links
89
+ let successLinks = links.filter(l => l.status === 'success');
90
+
91
+ // If user has assignments, filter to allowed docs only
92
+ if (allowedDocs) {
93
+ successLinks = successLinks.filter(l => allowedDocs.has(l.index));
94
+ } else {
95
+ // No assignments — take first N
96
+ successLinks = successLinks.slice(0, MAX_DOCS_TO_SCAN);
97
+ }
98
 
99
+ // Parallel fetch
100
  const results = await Promise.allSettled(
101
  successLinks.map(async (link) => {
102
  const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
 
130
  status: 200,
131
  headers: {
132
  'Content-Type': 'application/json',
133
+ 'Cache-Control': 'no-store'
134
  }
135
  });
136
  } catch (error) {
app/page.js CHANGED
@@ -43,26 +43,10 @@ export default function Home() {
43
  const annotatablePages = currentDoc?.annotatable_pages ?? [];
44
  const currentPageNumber = annotatablePages[pageIdx] ?? null;
45
 
46
- // Load documents on mount
47
- useEffect(() => {
48
- fetch('/api/documents')
49
- .then(res => res.json())
50
- .then(data => {
51
- setDocuments(data);
52
- if (data.length > 0) {
53
- setSelectedDocIndex(data[0].index);
54
- setPageIdx(0);
55
- }
56
- setLoading(false);
57
- })
58
- .catch(err => {
59
- console.error("Failed to load documents", err);
60
- setLoading(false);
61
- });
62
- }, []);
63
-
64
- // Read HF OAuth cookie for annotator identity
65
  useEffect(() => {
 
 
66
  try {
67
  const cookie = document.cookie
68
  .split('; ')
@@ -70,12 +54,33 @@ export default function Home() {
70
  if (cookie) {
71
  const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('=')));
72
  if (user.username) {
 
73
  setAnnotatorName(user.username);
74
  }
75
  }
76
  } catch (e) {
77
  console.warn('Could not read hf_user cookie', e);
78
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }, []);
80
 
81
  // Update currentDoc when selection changes
 
43
  const annotatablePages = currentDoc?.annotatable_pages ?? [];
44
  const currentPageNumber = annotatablePages[pageIdx] ?? null;
45
 
46
+ // Read HF OAuth cookie and load assigned documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  useEffect(() => {
48
+ // 1. Read username from cookie
49
+ let username = '';
50
  try {
51
  const cookie = document.cookie
52
  .split('; ')
 
54
  if (cookie) {
55
  const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('=')));
56
  if (user.username) {
57
+ username = user.username;
58
  setAnnotatorName(user.username);
59
  }
60
  }
61
  } catch (e) {
62
  console.warn('Could not read hf_user cookie', e);
63
  }
64
+
65
+ // 2. Fetch documents (filtered by user if logged in)
66
+ const url = username
67
+ ? `/api/documents?user=${encodeURIComponent(username)}`
68
+ : '/api/documents';
69
+
70
+ fetch(url)
71
+ .then(res => res.json())
72
+ .then(data => {
73
+ setDocuments(data);
74
+ if (data.length > 0) {
75
+ setSelectedDocIndex(data[0].index);
76
+ setPageIdx(0);
77
+ }
78
+ setLoading(false);
79
+ })
80
+ .catch(err => {
81
+ console.error("Failed to load documents", err);
82
+ setLoading(false);
83
+ });
84
  }, []);
85
 
86
  // Update currentDoc when selection changes