Spaces:
Running
Running
Commit ·
9cc2491
1
Parent(s): b584890
feat: add loading spinner and error fallback to PDF viewer
Browse filesShows 'Loading PDF...' with spinner while iframe loads.
Shows error state with 'Open PDF directly' link if load fails.
Resets loading state on document/page change.
- app/api/document/route.js +37 -50
- app/api/documents/route.js +33 -12
- app/components/PdfViewer.js +25 -5
- app/globals.css +23 -17
- app/page.js +2 -27
app/api/document/route.js
CHANGED
|
@@ -6,80 +6,67 @@ const isHFSpace = () => {
|
|
| 6 |
return process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
|
| 7 |
};
|
| 8 |
|
| 9 |
-
/**
|
| 10 |
-
* Loads ALL pages data for a given document index.
|
| 11 |
-
*/
|
| 12 |
-
async function loadPagesData(indexNum) {
|
| 13 |
-
if (isHFSpace()) {
|
| 14 |
-
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${indexNum}/raw/doc_${indexNum}_direct_judged.jsonl`;
|
| 15 |
-
const res = await fetch(docUrl, {
|
| 16 |
-
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
|
| 17 |
-
});
|
| 18 |
-
if (!res.ok) return null;
|
| 19 |
-
return await res.json();
|
| 20 |
-
} else {
|
| 21 |
-
const filePath = path.join(
|
| 22 |
-
process.cwd(),
|
| 23 |
-
'annotation_data', 'wbg_extractions',
|
| 24 |
-
`doc_${indexNum}`, 'raw', `doc_${indexNum}_direct_judged.jsonl`
|
| 25 |
-
);
|
| 26 |
-
if (!fs.existsSync(filePath)) return null;
|
| 27 |
-
return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
|
| 28 |
-
}
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
export async function GET(request) {
|
| 32 |
const { searchParams } = new URL(request.url);
|
| 33 |
const index = searchParams.get('index');
|
| 34 |
const page = searchParams.get('page');
|
| 35 |
|
| 36 |
-
|
|
|
|
| 37 |
return new Response(
|
| 38 |
-
JSON.stringify({ error: "Missing index parameter" }),
|
| 39 |
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
| 40 |
);
|
| 41 |
}
|
| 42 |
|
|
|
|
| 43 |
const indexNum = parseInt(index, 10);
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
return new Response(
|
| 46 |
-
JSON.stringify({ error: "index must be
|
| 47 |
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
| 48 |
);
|
| 49 |
}
|
| 50 |
|
| 51 |
try {
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
if (!pagesData) {
|
| 55 |
-
return new Response(
|
| 56 |
-
JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found` }),
|
| 57 |
-
{ status: 404, headers: { 'Content-Type': 'application/json' } }
|
| 58 |
-
);
|
| 59 |
-
}
|
| 60 |
-
|
| 61 |
-
// If no page specified, return just the annotatable pages list
|
| 62 |
-
if (page === null || page === undefined) {
|
| 63 |
-
const annotatablePages = pagesData
|
| 64 |
-
.filter(p => p.datasets && p.datasets.length > 0)
|
| 65 |
-
.map(p => p.document?.pages?.[0]);
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
| 70 |
});
|
| 71 |
-
}
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
const pageData = pagesData.find(p => p.document?.pages?.[0] === pageNum);
|
|
|
|
| 83 |
if (!pageData) {
|
| 84 |
return new Response(
|
| 85 |
JSON.stringify({ error: `Page ${pageNum} not found in doc ${indexNum}` }),
|
|
|
|
| 6 |
return process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
|
| 7 |
};
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
export async function GET(request) {
|
| 10 |
const { searchParams } = new URL(request.url);
|
| 11 |
const index = searchParams.get('index');
|
| 12 |
const page = searchParams.get('page');
|
| 13 |
|
| 14 |
+
// Validate required params
|
| 15 |
+
if (index === null || page === null) {
|
| 16 |
return new Response(
|
| 17 |
+
JSON.stringify({ error: "Missing index or page parameter" }),
|
| 18 |
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
| 19 |
);
|
| 20 |
}
|
| 21 |
|
| 22 |
+
// Validate numeric values
|
| 23 |
const indexNum = parseInt(index, 10);
|
| 24 |
+
const pageNum = parseInt(page, 10);
|
| 25 |
+
|
| 26 |
+
if (isNaN(indexNum) || isNaN(pageNum) || indexNum < 0 || pageNum < 0) {
|
| 27 |
return new Response(
|
| 28 |
+
JSON.stringify({ error: "index and page must be non-negative integers" }),
|
| 29 |
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
| 30 |
);
|
| 31 |
}
|
| 32 |
|
| 33 |
try {
|
| 34 |
+
let pagesData;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
if (isHFSpace()) {
|
| 37 |
+
// Production: fetch from HuggingFace
|
| 38 |
+
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${indexNum}/raw/doc_${indexNum}_direct_judged.jsonl`;
|
| 39 |
+
const res = await fetch(docUrl, {
|
| 40 |
+
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
|
| 41 |
});
|
|
|
|
| 42 |
|
| 43 |
+
if (!res.ok) {
|
| 44 |
+
return new Response(
|
| 45 |
+
JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found on HF Datasets` }),
|
| 46 |
+
{ status: res.status, headers: { 'Content-Type': 'application/json' } }
|
| 47 |
+
);
|
| 48 |
+
}
|
| 49 |
+
pagesData = await res.json();
|
| 50 |
+
} else {
|
| 51 |
+
// Local dev: read from local file (reflects saved annotations immediately)
|
| 52 |
+
const filePath = path.join(
|
| 53 |
+
process.cwd(),
|
| 54 |
+
'annotation_data', 'wbg_extractions',
|
| 55 |
+
`doc_${indexNum}`, 'raw', `doc_${indexNum}_direct_judged.jsonl`
|
| 56 |
);
|
| 57 |
+
|
| 58 |
+
if (!fs.existsSync(filePath)) {
|
| 59 |
+
return new Response(
|
| 60 |
+
JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found locally` }),
|
| 61 |
+
{ status: 404, headers: { 'Content-Type': 'application/json' } }
|
| 62 |
+
);
|
| 63 |
+
}
|
| 64 |
+
const raw = fs.readFileSync(filePath, 'utf-8');
|
| 65 |
+
pagesData = JSON.parse(raw);
|
| 66 |
}
|
| 67 |
|
| 68 |
const pageData = pagesData.find(p => p.document?.pages?.[0] === pageNum);
|
| 69 |
+
|
| 70 |
if (!pageData) {
|
| 71 |
return new Response(
|
| 72 |
JSON.stringify({ error: `Page ${pageNum} not found in doc ${indexNum}` }),
|
app/api/documents/route.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { HF_DATASET_BASE_URL } from '../../../utils/config.js';
|
| 2 |
|
| 3 |
export async function GET() {
|
| 4 |
try {
|
|
@@ -21,17 +21,38 @@ export async function GET() {
|
|
| 21 |
|
| 22 |
const links = await linksRes.json();
|
| 23 |
|
| 24 |
-
//
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
return new Response(JSON.stringify(documents), {
|
| 37 |
status: 200,
|
|
|
|
| 1 |
+
import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
|
| 2 |
|
| 3 |
export async function GET() {
|
| 4 |
try {
|
|
|
|
| 21 |
|
| 22 |
const links = await linksRes.json();
|
| 23 |
|
| 24 |
+
// Filter to successful links and take the first N
|
| 25 |
+
const successLinks = links.filter(l => l.status === 'success').slice(0, MAX_DOCS_TO_SCAN);
|
| 26 |
+
|
| 27 |
+
// Parallel fetch — much faster than sequential scanning
|
| 28 |
+
const results = await Promise.allSettled(
|
| 29 |
+
successLinks.map(async (link) => {
|
| 30 |
+
const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
|
| 31 |
+
const docRes = await fetch(docUrl, {
|
| 32 |
+
headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
|
| 33 |
+
});
|
| 34 |
+
|
| 35 |
+
if (!docRes.ok) return null;
|
| 36 |
+
|
| 37 |
+
const pagesData = await docRes.json();
|
| 38 |
+
const annotatablePages = pagesData
|
| 39 |
+
.filter(page => page.datasets && page.datasets.length > 0)
|
| 40 |
+
.map(page => page.document.pages[0]);
|
| 41 |
+
|
| 42 |
+
if (annotatablePages.length === 0) return null;
|
| 43 |
+
|
| 44 |
+
return {
|
| 45 |
+
index: link.index,
|
| 46 |
+
pdf_url: link.direct_pdf_url,
|
| 47 |
+
landing_page: link.landing_page_url,
|
| 48 |
+
annotatable_pages: annotatablePages
|
| 49 |
+
};
|
| 50 |
+
})
|
| 51 |
+
);
|
| 52 |
+
|
| 53 |
+
const documents = results
|
| 54 |
+
.filter(r => r.status === 'fulfilled' && r.value !== null)
|
| 55 |
+
.map(r => r.value);
|
| 56 |
|
| 57 |
return new Response(JSON.stringify(documents), {
|
| 58 |
status: 200,
|
app/components/PdfViewer.js
CHANGED
|
@@ -1,9 +1,16 @@
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
-
import { useState } from 'react';
|
| 4 |
|
| 5 |
export default function PdfViewer({ pdfUrl, pageNumber }) {
|
| 6 |
-
const [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
if (!pdfUrl) {
|
| 9 |
return (
|
|
@@ -17,14 +24,25 @@ export default function PdfViewer({ pdfUrl, pageNumber }) {
|
|
| 17 |
const viewerPage = (pageNumber ?? 0) + 1;
|
| 18 |
|
| 19 |
// Use Mozilla's hosted PDF.js viewer — supports #page=N for direct page navigation.
|
|
|
|
| 20 |
const pdfJsViewerUrl = `https://mozilla.github.io/pdf.js/web/viewer.html?file=${encodeURIComponent(pdfUrl)}#page=${viewerPage}`;
|
| 21 |
|
| 22 |
return (
|
| 23 |
<div className="pdf-container">
|
| 24 |
-
{
|
| 25 |
-
<div className="pdf-loading
|
| 26 |
<div className="loading-spinner" />
|
| 27 |
<p>Loading PDF...</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
</div>
|
| 29 |
)}
|
| 30 |
<iframe
|
|
@@ -33,7 +51,9 @@ export default function PdfViewer({ pdfUrl, pageNumber }) {
|
|
| 33 |
className="pdf-frame"
|
| 34 |
title={`PDF Page ${viewerPage}`}
|
| 35 |
allow="fullscreen"
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
/>
|
| 38 |
</div>
|
| 39 |
);
|
|
|
|
| 1 |
"use client";
|
| 2 |
|
| 3 |
+
import { useState, useEffect } from 'react';
|
| 4 |
|
| 5 |
export default function PdfViewer({ pdfUrl, pageNumber }) {
|
| 6 |
+
const [loading, setLoading] = useState(true);
|
| 7 |
+
const [error, setError] = useState(false);
|
| 8 |
+
|
| 9 |
+
// Reset loading state when URL or page changes
|
| 10 |
+
useEffect(() => {
|
| 11 |
+
setLoading(true);
|
| 12 |
+
setError(false);
|
| 13 |
+
}, [pdfUrl, pageNumber]);
|
| 14 |
|
| 15 |
if (!pdfUrl) {
|
| 16 |
return (
|
|
|
|
| 24 |
const viewerPage = (pageNumber ?? 0) + 1;
|
| 25 |
|
| 26 |
// Use Mozilla's hosted PDF.js viewer — supports #page=N for direct page navigation.
|
| 27 |
+
// This avoids X-Frame-Options restrictions from the source server.
|
| 28 |
const pdfJsViewerUrl = `https://mozilla.github.io/pdf.js/web/viewer.html?file=${encodeURIComponent(pdfUrl)}#page=${viewerPage}`;
|
| 29 |
|
| 30 |
return (
|
| 31 |
<div className="pdf-container">
|
| 32 |
+
{loading && !error && (
|
| 33 |
+
<div className="pdf-loading">
|
| 34 |
<div className="loading-spinner" />
|
| 35 |
<p>Loading PDF...</p>
|
| 36 |
+
<p className="pdf-loading-hint">This may take a moment for large documents.</p>
|
| 37 |
+
</div>
|
| 38 |
+
)}
|
| 39 |
+
{error && (
|
| 40 |
+
<div className="pdf-error">
|
| 41 |
+
<p>⚠️ Failed to load PDF</p>
|
| 42 |
+
<p className="pdf-error-hint">The document may be unavailable or too large.</p>
|
| 43 |
+
<a href={pdfUrl} target="_blank" rel="noopener noreferrer" className="btn btn-secondary">
|
| 44 |
+
Open PDF directly ↗
|
| 45 |
+
</a>
|
| 46 |
</div>
|
| 47 |
)}
|
| 48 |
<iframe
|
|
|
|
| 51 |
className="pdf-frame"
|
| 52 |
title={`PDF Page ${viewerPage}`}
|
| 53 |
allow="fullscreen"
|
| 54 |
+
style={{ display: loading && !error ? 'none' : 'block' }}
|
| 55 |
+
onLoad={() => setLoading(false)}
|
| 56 |
+
onError={() => { setLoading(false); setError(true); }}
|
| 57 |
/>
|
| 58 |
</div>
|
| 59 |
);
|
app/globals.css
CHANGED
|
@@ -236,32 +236,38 @@ h4 {
|
|
| 236 |
min-height: 0;
|
| 237 |
}
|
| 238 |
|
| 239 |
-
.pdf-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
display: flex;
|
| 244 |
flex-direction: column;
|
| 245 |
align-items: center;
|
| 246 |
justify-content: center;
|
| 247 |
-
|
| 248 |
-
background: var(--pane-bg);
|
| 249 |
border-radius: 12px;
|
|
|
|
|
|
|
| 250 |
}
|
| 251 |
|
| 252 |
-
.pdf-loading
|
| 253 |
-
|
| 254 |
-
|
| 255 |
}
|
| 256 |
|
| 257 |
-
.pdf-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
border: none;
|
| 262 |
-
background: white;
|
| 263 |
-
border-radius: 12px;
|
| 264 |
-
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
|
| 265 |
}
|
| 266 |
|
| 267 |
.pdf-placeholder {
|
|
|
|
| 236 |
min-height: 0;
|
| 237 |
}
|
| 238 |
|
| 239 |
+
.pdf-frame {
|
| 240 |
+
flex: 1;
|
| 241 |
+
width: 100%;
|
| 242 |
+
min-height: 0;
|
| 243 |
+
border: none;
|
| 244 |
+
background: white;
|
| 245 |
+
border-radius: 12px;
|
| 246 |
+
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
.pdf-loading,
|
| 250 |
+
.pdf-error {
|
| 251 |
+
flex: 1;
|
| 252 |
display: flex;
|
| 253 |
flex-direction: column;
|
| 254 |
align-items: center;
|
| 255 |
justify-content: center;
|
| 256 |
+
background-color: var(--surface);
|
|
|
|
| 257 |
border-radius: 12px;
|
| 258 |
+
color: #94a3b8;
|
| 259 |
+
gap: 8px;
|
| 260 |
}
|
| 261 |
|
| 262 |
+
.pdf-loading p,
|
| 263 |
+
.pdf-error p {
|
| 264 |
+
margin: 0;
|
| 265 |
}
|
| 266 |
|
| 267 |
+
.pdf-loading-hint,
|
| 268 |
+
.pdf-error-hint {
|
| 269 |
+
font-size: 0.75rem;
|
| 270 |
+
color: #64748b;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
}
|
| 272 |
|
| 273 |
.pdf-placeholder {
|
app/page.js
CHANGED
|
@@ -78,36 +78,11 @@ export default function Home() {
|
|
| 78 |
}
|
| 79 |
}, []);
|
| 80 |
|
| 81 |
-
//
|
| 82 |
useEffect(() => {
|
| 83 |
if (selectedDocIndex !== null) {
|
| 84 |
const doc = documents.find(d => d.index === selectedDocIndex);
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
// If annotatable_pages already loaded (cached), use it
|
| 88 |
-
if (doc.annotatable_pages) {
|
| 89 |
-
setCurrentDoc(doc);
|
| 90 |
-
return;
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
// Fetch annotatable pages on-demand
|
| 94 |
-
setLoadingPage(true);
|
| 95 |
-
fetch(`/api/document?index=${selectedDocIndex}`)
|
| 96 |
-
.then(res => res.json())
|
| 97 |
-
.then(data => {
|
| 98 |
-
const updatedDoc = { ...doc, annotatable_pages: data.annotatable_pages || [] };
|
| 99 |
-
setCurrentDoc(updatedDoc);
|
| 100 |
-
// Cache in the documents array
|
| 101 |
-
setDocuments(prev => prev.map(d =>
|
| 102 |
-
d.index === selectedDocIndex ? updatedDoc : d
|
| 103 |
-
));
|
| 104 |
-
setLoadingPage(false);
|
| 105 |
-
})
|
| 106 |
-
.catch(err => {
|
| 107 |
-
console.error("Failed to load document pages", err);
|
| 108 |
-
setCurrentDoc({ ...doc, annotatable_pages: [] });
|
| 109 |
-
setLoadingPage(false);
|
| 110 |
-
});
|
| 111 |
}
|
| 112 |
}, [selectedDocIndex, documents]);
|
| 113 |
|
|
|
|
| 78 |
}
|
| 79 |
}, []);
|
| 80 |
|
| 81 |
+
// Update currentDoc when selection changes
|
| 82 |
useEffect(() => {
|
| 83 |
if (selectedDocIndex !== null) {
|
| 84 |
const doc = documents.find(d => d.index === selectedDocIndex);
|
| 85 |
+
setCurrentDoc(doc);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
}
|
| 87 |
}, [selectedDocIndex, documents]);
|
| 88 |
|