Spaces:
Sleeping
Sleeping
| "use client"; | |
| import { useState, useEffect, useCallback } from 'react'; | |
| import DocumentSelector from './components/DocumentSelector'; | |
| import PdfViewer from './components/PdfViewer'; | |
| import MarkdownAnnotator from './components/MarkdownAnnotator'; | |
| import AnnotationPanel from './components/AnnotationPanel'; | |
| import AnnotationModal from './components/AnnotationModal'; | |
| import PageNavigator from './components/PageNavigator'; | |
| import ProgressBar from './components/ProgressBar'; | |
| import Leaderboard from './components/Leaderboard'; | |
| import HelpModal from './components/HelpModal'; | |
| export default function Home() { | |
| const [documents, setDocuments] = useState([]); | |
| const [selectedDocIndex, setSelectedDocIndex] = useState(null); | |
| const [selectedCorpus, setSelectedCorpus] = useState(null); | |
| const [currentDoc, setCurrentDoc] = useState(null); | |
| // Page-by-page navigation: track the index into annotatable_pages array | |
| const [pageIdx, setPageIdx] = useState(0); | |
| const [currentPageData, setCurrentPageData] = useState(null); | |
| const [loading, setLoading] = useState(true); | |
| const [loadingPage, setLoadingPage] = useState(false); | |
| // Annotations | |
| const [annotations, setAnnotations] = useState([]); | |
| const [annotatorName, setAnnotatorName] = useState(''); | |
| // Modal state | |
| const [modalOpen, setModalOpen] = useState(false); | |
| const [selectedText, setSelectedText] = useState(''); | |
| const [selectionOffset, setSelectionOffset] = useState(0); | |
| // Side panel state | |
| const [panelOpen, setPanelOpen] = useState(false); | |
| // Toast state | |
| const [toast, setToast] = useState(null); | |
| // Leaderboard state | |
| const [leaderboardOpen, setLeaderboardOpen] = useState(false); | |
| const [helpOpen, setHelpOpen] = useState(false); | |
| const showToast = useCallback((message, type = 'success') => { | |
| setToast({ message, type }); | |
| setTimeout(() => setToast(null), 3000); | |
| }, []); | |
| // Derived: current page number from the annotatable_pages array | |
| const annotatablePages = currentDoc?.annotatable_pages ?? []; | |
| const pagesWithMentions = new Set(currentDoc?.pages_with_mentions ?? []); | |
| const currentPageNumber = annotatablePages[pageIdx] ?? null; | |
| // Load documents (re-fetches when annotatorName changes to get user-specific assignment) | |
| useEffect(() => { | |
| // Don't fetch until we know who the user is β avoids showing all docs briefly | |
| if (!annotatorName) return; | |
| setLoading(true); | |
| const url = `/api/documents?user=${encodeURIComponent(annotatorName)}`; | |
| fetch(url) | |
| .then(res => res.json()) | |
| .then(data => { | |
| setDocuments(data); | |
| if (data.length > 0) { | |
| const savedDoc = sessionStorage.getItem('selectedDocIndex'); | |
| const savedCorpus = sessionStorage.getItem('selectedCorpus'); | |
| const savedPage = sessionStorage.getItem('pageIdx'); | |
| const docIdx = savedDoc ? parseInt(savedDoc, 10) : null; | |
| const restoredDoc = docIdx !== null && data.find(d => d.index === docIdx && (!savedCorpus || d.corpus === savedCorpus)); | |
| if (restoredDoc) { | |
| setSelectedDocIndex(docIdx); | |
| setSelectedCorpus(restoredDoc.corpus); | |
| setPageIdx(savedPage ? parseInt(savedPage, 10) : 0); | |
| } else { | |
| setSelectedDocIndex(data[0].index); | |
| setSelectedCorpus(data[0].corpus); | |
| setPageIdx(0); | |
| } | |
| } | |
| setLoading(false); | |
| }) | |
| .catch(err => { | |
| console.error("Failed to load documents", err); | |
| setLoading(false); | |
| }); | |
| }, [annotatorName]); | |
| // Read HF OAuth cookie for annotator identity | |
| useEffect(() => { | |
| try { | |
| const cookie = document.cookie | |
| .split('; ') | |
| .find(c => c.startsWith('hf_user=')); | |
| if (cookie) { | |
| const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('='))); | |
| if (user.username) { | |
| setAnnotatorName(user.username); | |
| } | |
| } | |
| } catch (e) { | |
| console.warn('Could not read hf_user cookie', e); | |
| } | |
| }, []); | |
| // Update currentDoc when selection changes + persist to sessionStorage | |
| useEffect(() => { | |
| if (selectedDocIndex !== null && selectedCorpus !== null) { | |
| const doc = documents.find(d => d.index === selectedDocIndex && d.corpus === selectedCorpus); | |
| setCurrentDoc(doc); | |
| sessionStorage.setItem('selectedDocIndex', selectedDocIndex); | |
| sessionStorage.setItem('selectedCorpus', selectedCorpus); | |
| // Clamp pageIdx to valid range for this document | |
| if (doc) { | |
| const maxPage = (doc.annotatable_pages?.length ?? 1) - 1; | |
| setPageIdx(prev => Math.min(prev, Math.max(0, maxPage))); | |
| } | |
| } | |
| }, [selectedDocIndex, selectedCorpus, documents]); | |
| // Persist pageIdx to sessionStorage | |
| useEffect(() => { | |
| sessionStorage.setItem('pageIdx', pageIdx); | |
| }, [pageIdx]); | |
| // Fetch page data (reusable β called on page change and after saving) | |
| const refreshPageData = useCallback(() => { | |
| if (selectedDocIndex !== null && currentPageNumber !== null) { | |
| setLoadingPage(true); | |
| fetch(`/api/document?index=${selectedDocIndex}&page=${currentPageNumber}&corpus=${selectedCorpus || ''}`) | |
| .then(res => res.json()) | |
| .then(data => { | |
| setCurrentPageData(data); | |
| setLoadingPage(false); | |
| }) | |
| .catch(err => { | |
| console.error("Failed to load page data", err); | |
| setLoadingPage(false); | |
| }); | |
| } | |
| }, [selectedDocIndex, currentPageNumber]); | |
| // Load page data when page changes | |
| useEffect(() => { | |
| refreshPageData(); | |
| }, [refreshPageData]); | |
| // Load persisted annotations on mount | |
| useEffect(() => { | |
| fetch('/api/annotations') | |
| .then(res => res.json()) | |
| .then(data => { | |
| if (Array.isArray(data)) setAnnotations(data); | |
| }) | |
| .catch(err => console.error("Failed to load annotations", err)); | |
| }, []) | |
| // Auto-fetch HF username in production | |
| useEffect(() => { | |
| fetch('/api/whoami') | |
| .then(res => res.ok ? res.json() : null) | |
| .then(data => { | |
| if (data?.username) { | |
| setAnnotatorName(data.username); | |
| localStorage.setItem('annotator_name', data.username); | |
| } | |
| }) | |
| .catch(() => { }); // Silently ignore β falls back to localStorage name | |
| }, []); | |
| // Load annotator name from localStorage | |
| useEffect(() => { | |
| const saved = localStorage.getItem('annotator_name'); | |
| if (saved) setAnnotatorName(saved); | |
| }, []); | |
| const handleAnnotatorChange = (name) => { | |
| setAnnotatorName(name); | |
| localStorage.setItem('annotator_name', name); | |
| }; | |
| const handleDocChange = (corpus, docIdx) => { | |
| setSelectedCorpus(corpus); | |
| setSelectedDocIndex(docIdx); | |
| setPageIdx(0); | |
| }; | |
| const handlePrevPage = () => { | |
| setPageIdx(prev => Math.max(0, prev - 1)); | |
| }; | |
| const handleNextPage = () => { | |
| const unverified = currentPageDatasets.filter(ds => { | |
| const myValidation = (ds.validations || []).find(v => v.annotator === annotatorName); | |
| return myValidation?.human_validated !== true; | |
| }).length; | |
| if (unverified > 0) { | |
| const proceed = confirm( | |
| `β οΈ You have ${unverified} unverified data mention${unverified > 1 ? 's' : ''} on this page.\n\nDo you want to proceed to the next page?` | |
| ); | |
| if (!proceed) return; | |
| } | |
| setPageIdx(prev => Math.min(annotatablePages.length - 1, prev + 1)); | |
| }; | |
| const handleJumpToMention = (direction = 1) => { | |
| const currentPage = annotatablePages[pageIdx]; | |
| if (direction === 1) { | |
| // Find next page index (after current) that has mentions | |
| for (let i = pageIdx + 1; i < annotatablePages.length; i++) { | |
| if (pagesWithMentions.has(annotatablePages[i])) { | |
| setPageIdx(i); | |
| return; | |
| } | |
| } | |
| } else { | |
| // Find previous page index that has mentions | |
| for (let i = pageIdx - 1; i >= 0; i--) { | |
| if (pagesWithMentions.has(annotatablePages[i])) { | |
| setPageIdx(i); | |
| return; | |
| } | |
| } | |
| } | |
| }; | |
| const handleAnnotate = (text, domOffset) => { | |
| setSelectedText(text); | |
| setSelectionOffset(domOffset || 0); | |
| setModalOpen(true); | |
| }; | |
| const handleAnnotationSubmit = async ({ dataset_tag }) => { | |
| const inputText = currentPageData?.input_text || ""; | |
| // Strip markdown formatting so browser-selected plain text can be found | |
| const stripMd = (s) => s | |
| .replace(/\*\*\*/g, '') // bold-italic | |
| .replace(/\*\*/g, '') // bold | |
| .replace(/\*/g, '') // italic | |
| .replace(/__/g, '') | |
| .replace(/_/g, ' ') | |
| .replace(/^#{1,6}\s+/gm, '') // headings | |
| .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // links | |
| const plainText = stripMd(inputText); | |
| // Normalize whitespace in selected text (browser may add extra spaces/newlines) | |
| const normalizedSelection = selectedText.replace(/\s+/g, ' ').trim(); | |
| // Find occurrences in the stripped text | |
| const occurrences = []; | |
| let searchFrom = 0; | |
| while (searchFrom < plainText.length) { | |
| const idx = plainText.indexOf(normalizedSelection, searchFrom); | |
| if (idx === -1) break; | |
| occurrences.push(idx); | |
| searchFrom = idx + 1; | |
| } | |
| // If not found with exact match, try case-insensitive | |
| if (occurrences.length === 0) { | |
| const lowerPlain = plainText.toLowerCase(); | |
| const lowerSel = normalizedSelection.toLowerCase(); | |
| let sf = 0; | |
| while (sf < lowerPlain.length) { | |
| const idx = lowerPlain.indexOf(lowerSel, sf); | |
| if (idx === -1) break; | |
| occurrences.push(idx); | |
| sf = idx + 1; | |
| } | |
| } | |
| let startIdx = null; | |
| if (occurrences.length === 1) { | |
| startIdx = occurrences[0]; | |
| } else if (occurrences.length > 1) { | |
| const container = document.querySelector('.markdown-preview'); | |
| const visibleLen = container?.textContent?.length || plainText.length; | |
| const ratio = plainText.length / visibleLen; | |
| const estimatedSourcePos = selectionOffset * ratio; | |
| startIdx = occurrences.reduce((best, idx) => | |
| Math.abs(idx - estimatedSourcePos) < Math.abs(best - estimatedSourcePos) ? idx : best | |
| ); | |
| } | |
| const endIdx = startIdx !== null ? startIdx + normalizedSelection.length : null; | |
| const payload = { | |
| dataset_name: { | |
| text: selectedText, | |
| confidence: 1.0, | |
| start: startIdx >= 0 ? startIdx : null, | |
| end: endIdx, | |
| }, | |
| dataset_tag: dataset_tag, | |
| source: 'human', | |
| annotator: annotatorName || "user", | |
| corpus: selectedCorpus, | |
| document_index: selectedDocIndex, | |
| page_number: currentPageNumber, | |
| timestamp: new Date().toISOString(), | |
| description: null, | |
| data_type: null, | |
| acronym: null, | |
| author: null, | |
| producer: null, | |
| geography: null, | |
| publication_year: null, | |
| reference_year: null, | |
| reference_population: null, | |
| is_used: null, | |
| usage_context: null, | |
| }; | |
| try { | |
| const res = await fetch('/api/annotate', { | |
| method: 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify(payload) | |
| }); | |
| if (res.ok) { | |
| setAnnotations(prev => [...prev, payload]); | |
| setModalOpen(false); | |
| showToast("Annotation saved!"); | |
| refreshPageData(); // Refresh so new annotation appears highlighted | |
| } else { | |
| showToast("Failed to save annotation.", "error"); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| showToast("Network error saving annotation.", "error"); | |
| } | |
| }; | |
| // Delete dataset entry by index | |
| const handleDeleteAnnotation = async (ds, idx) => { | |
| const rawIdx = ds._rawIndex ?? idx; | |
| try { | |
| const res = await fetch( | |
| `/api/validate?doc=${selectedDocIndex}&page=${currentPageNumber}&idx=${rawIdx}&corpus=${selectedCorpus || ''}`, | |
| { method: 'DELETE' } | |
| ); | |
| if (res.ok) { | |
| refreshPageData(); | |
| showToast("Data mention deleted."); | |
| } else { | |
| showToast("Failed to delete.", "error"); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| showToast("Network error deleting.", "error"); | |
| } | |
| }; | |
| // Update annotation (e.g. change dataset_tag) | |
| const handleUpdateAnnotation = async (annotation, idx, updates) => { | |
| try { | |
| const res = await fetch('/api/annotate', { | |
| method: 'PUT', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify({ | |
| timestamp: annotation.timestamp, | |
| document_index: annotation.document_index ?? selectedDocIndex, | |
| page_number: annotation.page_number ?? currentPageNumber, | |
| updates, | |
| }) | |
| }); | |
| if (res.ok) { | |
| setAnnotations(prev => prev.map(a => | |
| a.timestamp === annotation.timestamp ? { ...a, ...updates } : a | |
| )); | |
| showToast("Annotation updated."); | |
| } else { | |
| showToast("Failed to update annotation.", "error"); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| showToast("Network error updating annotation.", "error"); | |
| } | |
| }; | |
| // All datasets on the current page, excluding consensus non-datasets. | |
| // Preserve _rawIndex so delete/validate use the correct position in the file. | |
| const currentPageDatasets = (currentPageData?.datasets || []) | |
| .map((ds, i) => ({ ...ds, _rawIndex: i })) | |
| .filter(ds => { | |
| if (ds.dataset_tag === 'non-dataset' && ds.dataset_name?.judge_agrees === true) { | |
| return false; | |
| } | |
| return true; | |
| }); | |
| // Validate a dataset entry (approve/reject with notes) | |
| const handleValidateDataset = async (datasetIdx, updates) => { | |
| try { | |
| const res = await fetch('/api/validate', { | |
| method: 'PUT', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify({ | |
| corpus: selectedCorpus, | |
| document_index: selectedDocIndex, | |
| page_number: currentPageNumber, | |
| dataset_index: datasetIdx, | |
| updates, | |
| }) | |
| }); | |
| if (res.ok) { | |
| refreshPageData(); | |
| showToast("Validation saved!"); | |
| } else { | |
| showToast("Failed to save validation.", "error"); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| showToast("Network error saving validation.", "error"); | |
| } | |
| }; | |
| // Gate: require HF OAuth login | |
| if (!annotatorName) { | |
| const handleLogin = () => { | |
| // Open login in a new tab to bypass iframe restrictions | |
| window.open('/api/auth/login', '_blank'); | |
| // Poll for the cookie to appear (set by callback) | |
| const poll = setInterval(() => { | |
| try { | |
| const cookie = document.cookie | |
| .split('; ') | |
| .find(c => c.startsWith('hf_user=')); | |
| if (cookie) { | |
| clearInterval(poll); | |
| const user = JSON.parse(decodeURIComponent(cookie.split('=').slice(1).join('='))); | |
| if (user.username) { | |
| setAnnotatorName(user.username); | |
| } | |
| } | |
| } catch (e) { /* ignore */ } | |
| }, 2000); | |
| }; | |
| return ( | |
| <div className="login-gate"> | |
| <div className="login-card"> | |
| <h1>π Data Use Annotation Tool</h1> | |
| <p>Sign in with your HuggingFace account to access the tool.</p> | |
| <button onClick={handleLogin} className="btn btn-login-large"> | |
| π€ Sign in with HuggingFace | |
| </button> | |
| <p className="login-note">Only authorized annotators can access this tool.</p> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| if (loading) { | |
| return ( | |
| <div className="loading-container"> | |
| <div className="loading-spinner" /> | |
| <p>Loading documents...</p> | |
| </div> | |
| ); | |
| } | |
| return ( | |
| <div className="app-wrapper"> | |
| {/* Top bar with user identity */} | |
| <div className="top-bar"> | |
| <span className="top-bar-title">Data Use Annotation Tool</span> | |
| <div className="top-bar-right"> | |
| <ProgressBar | |
| documents={documents} | |
| selectedDocIndex={selectedDocIndex} | |
| currentDoc={currentDoc} | |
| pageIdx={pageIdx} | |
| currentPageDatasets={currentPageDatasets} | |
| annotatorName={annotatorName} | |
| /> | |
| <div className="top-bar-user"> | |
| <button | |
| className="btn-leaderboard" | |
| onClick={() => setLeaderboardOpen(true)} | |
| > | |
| π Leaderboard | |
| </button> | |
| <button | |
| className="btn-leaderboard" | |
| onClick={() => setHelpOpen(true)} | |
| > | |
| β Help | |
| </button> | |
| {annotatorName ? ( | |
| <span className="user-badge">π€ {annotatorName}</span> | |
| ) : ( | |
| <a href="/api/auth/login" className="btn btn-login" target="_blank" rel="noopener"> | |
| π Sign in with HF | |
| </a> | |
| )} | |
| </div> | |
| </div> | |
| </div> | |
| <div className="container"> | |
| <div className="pane left-pane"> | |
| <div className="pane-header"> | |
| <DocumentSelector | |
| documents={documents} | |
| selectedDocIndex={selectedDocIndex} | |
| selectedCorpus={selectedCorpus} | |
| onDocChange={handleDocChange} | |
| /> | |
| </div> | |
| <PdfViewer | |
| pdfUrl={currentDoc?.pdf_url} | |
| pageNumber={currentPageNumber} | |
| /> | |
| </div> | |
| <div className="pane right-pane"> | |
| <MarkdownAnnotator | |
| selectedDocIndex={selectedDocIndex} | |
| selectedPage={currentPageNumber} | |
| currentPageData={currentPageData} | |
| loadingPage={loadingPage} | |
| onAnnotate={handleAnnotate} | |
| /> | |
| </div> | |
| {/* Floating chevron to open annotations panel */} | |
| <button | |
| className="panel-chevron" | |
| onClick={() => setPanelOpen(prev => !prev)} | |
| title="Toggle annotations" | |
| > | |
| {panelOpen ? 'βΊ' : 'βΉ'} | |
| {!panelOpen && currentPageDatasets.length > 0 && ( | |
| <span className="chevron-badge">{currentPageDatasets.length}</span> | |
| )} | |
| </button> | |
| <AnnotationPanel | |
| isOpen={panelOpen} | |
| onClose={() => setPanelOpen(false)} | |
| datasets={currentPageDatasets} | |
| annotatorName={annotatorName} | |
| onValidate={handleValidateDataset} | |
| onDelete={handleDeleteAnnotation} | |
| /> | |
| {/* Shared page navigator at the bottom */} | |
| <div className="bottom-nav"> | |
| <PageNavigator | |
| currentIndex={pageIdx} | |
| totalPages={annotatablePages.length} | |
| currentPageNumber={currentPageNumber} | |
| hasMentions={pagesWithMentions.has(currentPageNumber)} | |
| onPrevious={handlePrevPage} | |
| onNext={handleNextPage} | |
| onJumpToMention={handleJumpToMention} | |
| hasNextMention={annotatablePages.slice(pageIdx + 1).some(p => pagesWithMentions.has(p))} | |
| hasPrevMention={annotatablePages.slice(0, pageIdx).some(p => pagesWithMentions.has(p))} | |
| /> | |
| </div> | |
| <AnnotationModal | |
| isOpen={modalOpen} | |
| selectedText={selectedText} | |
| annotatorName={annotatorName} | |
| onAnnotatorChange={handleAnnotatorChange} | |
| onSubmit={handleAnnotationSubmit} | |
| onClose={() => setModalOpen(false)} | |
| /> | |
| {toast && ( | |
| <div className={`toast toast-${toast.type}`}> | |
| {toast.message} | |
| </div> | |
| )} | |
| </div> | |
| <Leaderboard isOpen={leaderboardOpen} onClose={() => setLeaderboardOpen(false)} /> | |
| <HelpModal isOpen={helpOpen} onClose={() => setHelpOpen(false)} /> | |
| </div> | |
| ); | |
| } | |