"use client"; import { useState, useEffect, useCallback } from 'react'; import DocumentSelector from './components/DocumentSelector'; import PdfViewer from './components/PdfViewer'; import MarkdownAnnotator from './components/MarkdownAnnotator'; import AnnotationPanel from './components/AnnotationPanel'; import AnnotationModal from './components/AnnotationModal'; import PageNavigator from './components/PageNavigator'; import ProgressBar from './components/ProgressBar'; import Leaderboard from './components/Leaderboard'; import HelpModal from './components/HelpModal'; export default function Home() { const [documents, setDocuments] = useState([]); const [selectedDocIndex, setSelectedDocIndex] = useState(null); const [selectedCorpus, setSelectedCorpus] = useState(null); const [currentDoc, setCurrentDoc] = useState(null); // Page-by-page navigation: track the index into annotatable_pages array const [pageIdx, setPageIdx] = useState(0); const [currentPageData, setCurrentPageData] = useState(null); const [loading, setLoading] = useState(true); const [loadingPage, setLoadingPage] = useState(false); // Annotations const [annotations, setAnnotations] = useState([]); const [annotatorName, setAnnotatorName] = useState(''); // Modal state const [modalOpen, setModalOpen] = useState(false); const [selectedText, setSelectedText] = useState(''); const [selectionOffset, setSelectionOffset] = useState(0); // Side panel state const [panelOpen, setPanelOpen] = useState(false); // Toast state const [toast, setToast] = useState(null); // Leaderboard state const [leaderboardOpen, setLeaderboardOpen] = useState(false); const [helpOpen, setHelpOpen] = useState(false); const showToast = useCallback((message, type = 'success') => { setToast({ message, type }); setTimeout(() => setToast(null), 3000); }, []); // Derived: current page number from the annotatable_pages array const annotatablePages = currentDoc?.annotatable_pages ?? []; const pagesWithMentions = new Set(currentDoc?.pages_with_mentions ?? []); const currentPageNumber = annotatablePages[pageIdx] ?? null; // Load documents (re-fetches when annotatorName changes to get user-specific assignment) useEffect(() => { // Don't fetch until we know who the user is — avoids showing all docs briefly if (!annotatorName) return; setLoading(true); const url = `/api/documents?user=${encodeURIComponent(annotatorName)}`; fetch(url) .then(res => res.json()) .then(data => { setDocuments(data); if (data.length > 0) { const savedDoc = sessionStorage.getItem('selectedDocIndex'); const savedCorpus = sessionStorage.getItem('selectedCorpus'); const savedPage = sessionStorage.getItem('pageIdx'); const docIdx = savedDoc ? parseInt(savedDoc, 10) : null; const restoredDoc = docIdx !== null && data.find(d => d.index === docIdx && (!savedCorpus || d.corpus === savedCorpus)); if (restoredDoc) { setSelectedDocIndex(docIdx); setSelectedCorpus(restoredDoc.corpus); setPageIdx(savedPage ? parseInt(savedPage, 10) : 0); } else { setSelectedDocIndex(data[0].index); setSelectedCorpus(data[0].corpus); setPageIdx(0); } } setLoading(false); }) .catch(err => { console.error("Failed to load documents", err); setLoading(false); }); }, [annotatorName]); // Read HF OAuth cookie for annotator identity useEffect(() => { try { const cookie = document.cookie .split('; ') .find(c => c.startsWith('hf_user=')); if (cookie) { const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('='))); if (user.username) { setAnnotatorName(user.username); } } } catch (e) { console.warn('Could not read hf_user cookie', e); } }, []); // Update currentDoc when selection changes + persist to sessionStorage useEffect(() => { if (selectedDocIndex !== null && selectedCorpus !== null) { const doc = documents.find(d => d.index === selectedDocIndex && d.corpus === selectedCorpus); setCurrentDoc(doc); sessionStorage.setItem('selectedDocIndex', selectedDocIndex); sessionStorage.setItem('selectedCorpus', selectedCorpus); // Clamp pageIdx to valid range for this document if (doc) { const maxPage = (doc.annotatable_pages?.length ?? 1) - 1; setPageIdx(prev => Math.min(prev, Math.max(0, maxPage))); } } }, [selectedDocIndex, selectedCorpus, documents]); // Persist pageIdx to sessionStorage useEffect(() => { sessionStorage.setItem('pageIdx', pageIdx); }, [pageIdx]); // Fetch page data (reusable — called on page change and after saving) const refreshPageData = useCallback(() => { if (selectedDocIndex !== null && currentPageNumber !== null) { setLoadingPage(true); fetch(`/api/document?index=${selectedDocIndex}&page=${currentPageNumber}&corpus=${selectedCorpus || ''}`) .then(res => res.json()) .then(data => { setCurrentPageData(data); setLoadingPage(false); }) .catch(err => { console.error("Failed to load page data", err); setLoadingPage(false); }); } }, [selectedDocIndex, currentPageNumber]); // Load page data when page changes useEffect(() => { refreshPageData(); }, [refreshPageData]); // Load persisted annotations on mount useEffect(() => { fetch('/api/annotations') .then(res => res.json()) .then(data => { if (Array.isArray(data)) setAnnotations(data); }) .catch(err => console.error("Failed to load annotations", err)); }, []) // Auto-fetch HF username in production useEffect(() => { fetch('/api/whoami') .then(res => res.ok ? res.json() : null) .then(data => { if (data?.username) { setAnnotatorName(data.username); localStorage.setItem('annotator_name', data.username); } }) .catch(() => { }); // Silently ignore — falls back to localStorage name }, []); // Load annotator name from localStorage useEffect(() => { const saved = localStorage.getItem('annotator_name'); if (saved) setAnnotatorName(saved); }, []); const handleAnnotatorChange = (name) => { setAnnotatorName(name); localStorage.setItem('annotator_name', name); }; const handleDocChange = (corpus, docIdx) => { setSelectedCorpus(corpus); setSelectedDocIndex(docIdx); setPageIdx(0); }; const handlePrevPage = () => { setPageIdx(prev => Math.max(0, prev - 1)); }; const handleNextPage = () => { const unverified = currentPageDatasets.filter(ds => { const myValidation = (ds.validations || []).find(v => v.annotator === annotatorName); return myValidation?.human_validated !== true; }).length; if (unverified > 0) { const proceed = confirm( `⚠️ You have ${unverified} unverified data mention${unverified > 1 ? 's' : ''} on this page.\n\nDo you want to proceed to the next page?` ); if (!proceed) return; } setPageIdx(prev => Math.min(annotatablePages.length - 1, prev + 1)); }; const handleJumpToMention = (direction = 1) => { const currentPage = annotatablePages[pageIdx]; if (direction === 1) { // Find next page index (after current) that has mentions for (let i = pageIdx + 1; i < annotatablePages.length; i++) { if (pagesWithMentions.has(annotatablePages[i])) { setPageIdx(i); return; } } } else { // Find previous page index that has mentions for (let i = pageIdx - 1; i >= 0; i--) { if (pagesWithMentions.has(annotatablePages[i])) { setPageIdx(i); return; } } } }; const handleAnnotate = (text, domOffset) => { setSelectedText(text); setSelectionOffset(domOffset || 0); setModalOpen(true); }; const handleAnnotationSubmit = async ({ dataset_tag }) => { const inputText = currentPageData?.input_text || ""; // Strip markdown formatting so browser-selected plain text can be found const stripMd = (s) => s .replace(/\*\*\*/g, '') // bold-italic .replace(/\*\*/g, '') // bold .replace(/\*/g, '') // italic .replace(/__/g, '') .replace(/_/g, ' ') .replace(/^#{1,6}\s+/gm, '') // headings .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // links const plainText = stripMd(inputText); // Normalize whitespace in selected text (browser may add extra spaces/newlines) const normalizedSelection = selectedText.replace(/\s+/g, ' ').trim(); // Find occurrences in the stripped text const occurrences = []; let searchFrom = 0; while (searchFrom < plainText.length) { const idx = plainText.indexOf(normalizedSelection, searchFrom); if (idx === -1) break; occurrences.push(idx); searchFrom = idx + 1; } // If not found with exact match, try case-insensitive if (occurrences.length === 0) { const lowerPlain = plainText.toLowerCase(); const lowerSel = normalizedSelection.toLowerCase(); let sf = 0; while (sf < lowerPlain.length) { const idx = lowerPlain.indexOf(lowerSel, sf); if (idx === -1) break; occurrences.push(idx); sf = idx + 1; } } let startIdx = null; if (occurrences.length === 1) { startIdx = occurrences[0]; } else if (occurrences.length > 1) { const container = document.querySelector('.markdown-preview'); const visibleLen = container?.textContent?.length || plainText.length; const ratio = plainText.length / visibleLen; const estimatedSourcePos = selectionOffset * ratio; startIdx = occurrences.reduce((best, idx) => Math.abs(idx - estimatedSourcePos) < Math.abs(best - estimatedSourcePos) ? idx : best ); } const endIdx = startIdx !== null ? startIdx + normalizedSelection.length : null; const payload = { dataset_name: { text: selectedText, confidence: 1.0, start: startIdx >= 0 ? startIdx : null, end: endIdx, }, dataset_tag: dataset_tag, source: 'human', annotator: annotatorName || "user", corpus: selectedCorpus, document_index: selectedDocIndex, page_number: currentPageNumber, timestamp: new Date().toISOString(), description: null, data_type: null, acronym: null, author: null, producer: null, geography: null, publication_year: null, reference_year: null, reference_population: null, is_used: null, usage_context: null, }; try { const res = await fetch('/api/annotate', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) }); if (res.ok) { setAnnotations(prev => [...prev, payload]); setModalOpen(false); showToast("Annotation saved!"); refreshPageData(); // Refresh so new annotation appears highlighted } else { showToast("Failed to save annotation.", "error"); } } catch (err) { console.error(err); showToast("Network error saving annotation.", "error"); } }; // Delete dataset entry by index const handleDeleteAnnotation = async (ds, idx) => { const rawIdx = ds._rawIndex ?? idx; try { const res = await fetch( `/api/validate?doc=${selectedDocIndex}&page=${currentPageNumber}&idx=${rawIdx}&corpus=${selectedCorpus || ''}`, { method: 'DELETE' } ); if (res.ok) { refreshPageData(); showToast("Data mention deleted."); } else { showToast("Failed to delete.", "error"); } } catch (err) { console.error(err); showToast("Network error deleting.", "error"); } }; // Update annotation (e.g. change dataset_tag) const handleUpdateAnnotation = async (annotation, idx, updates) => { try { const res = await fetch('/api/annotate', { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ timestamp: annotation.timestamp, document_index: annotation.document_index ?? selectedDocIndex, page_number: annotation.page_number ?? currentPageNumber, updates, }) }); if (res.ok) { setAnnotations(prev => prev.map(a => a.timestamp === annotation.timestamp ? { ...a, ...updates } : a )); showToast("Annotation updated."); } else { showToast("Failed to update annotation.", "error"); } } catch (err) { console.error(err); showToast("Network error updating annotation.", "error"); } }; // All datasets on the current page, excluding consensus non-datasets. // Preserve _rawIndex so delete/validate use the correct position in the file. const currentPageDatasets = (currentPageData?.datasets || []) .map((ds, i) => ({ ...ds, _rawIndex: i })) .filter(ds => { if (ds.dataset_tag === 'non-dataset' && ds.dataset_name?.judge_agrees === true) { return false; } return true; }); // Validate a dataset entry (approve/reject with notes) const handleValidateDataset = async (datasetIdx, updates) => { try { const res = await fetch('/api/validate', { method: 'PUT', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ corpus: selectedCorpus, document_index: selectedDocIndex, page_number: currentPageNumber, dataset_index: datasetIdx, updates, }) }); if (res.ok) { refreshPageData(); showToast("Validation saved!"); } else { showToast("Failed to save validation.", "error"); } } catch (err) { console.error(err); showToast("Network error saving validation.", "error"); } }; // Gate: require HF OAuth login if (!annotatorName) { const handleLogin = () => { // Open login in a new tab to bypass iframe restrictions window.open('/api/auth/login', '_blank'); // Poll for the cookie to appear (set by callback) const poll = setInterval(() => { try { const cookie = document.cookie .split('; ') .find(c => c.startsWith('hf_user=')); if (cookie) { clearInterval(poll); const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('='))); if (user.username) { setAnnotatorName(user.username); } } } catch (e) { /* ignore */ } }, 2000); }; return (
Sign in with your HuggingFace account to access the tool.
Only authorized annotators can access this tool.
Loading documents...