rafmacalaba commited on
Commit
9cc2491
·
1 Parent(s): b584890

feat: add loading spinner and error fallback to PDF viewer

Browse files

Shows 'Loading PDF...' with spinner while iframe loads.
Shows error state with 'Open PDF directly' link if load fails.
Resets loading state on document/page change.

app/api/document/route.js CHANGED
@@ -6,80 +6,67 @@ const isHFSpace = () => {
6
  return process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
7
  };
8
 
9
- /**
10
- * Loads ALL pages data for a given document index.
11
- */
12
- async function loadPagesData(indexNum) {
13
- if (isHFSpace()) {
14
- const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${indexNum}/raw/doc_${indexNum}_direct_judged.jsonl`;
15
- const res = await fetch(docUrl, {
16
- headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
17
- });
18
- if (!res.ok) return null;
19
- return await res.json();
20
- } else {
21
- const filePath = path.join(
22
- process.cwd(),
23
- 'annotation_data', 'wbg_extractions',
24
- `doc_${indexNum}`, 'raw', `doc_${indexNum}_direct_judged.jsonl`
25
- );
26
- if (!fs.existsSync(filePath)) return null;
27
- return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
28
- }
29
- }
30
-
31
  export async function GET(request) {
32
  const { searchParams } = new URL(request.url);
33
  const index = searchParams.get('index');
34
  const page = searchParams.get('page');
35
 
36
- if (index === null) {
 
37
  return new Response(
38
- JSON.stringify({ error: "Missing index parameter" }),
39
  { status: 400, headers: { 'Content-Type': 'application/json' } }
40
  );
41
  }
42
 
 
43
  const indexNum = parseInt(index, 10);
44
- if (isNaN(indexNum) || indexNum < 0) {
 
 
45
  return new Response(
46
- JSON.stringify({ error: "index must be a non-negative integer" }),
47
  { status: 400, headers: { 'Content-Type': 'application/json' } }
48
  );
49
  }
50
 
51
  try {
52
- const pagesData = await loadPagesData(indexNum);
53
-
54
- if (!pagesData) {
55
- return new Response(
56
- JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found` }),
57
- { status: 404, headers: { 'Content-Type': 'application/json' } }
58
- );
59
- }
60
-
61
- // If no page specified, return just the annotatable pages list
62
- if (page === null || page === undefined) {
63
- const annotatablePages = pagesData
64
- .filter(p => p.datasets && p.datasets.length > 0)
65
- .map(p => p.document?.pages?.[0]);
66
 
67
- return new Response(JSON.stringify({ annotatable_pages: annotatablePages }), {
68
- status: 200,
69
- headers: { 'Content-Type': 'application/json' }
 
 
70
  });
71
- }
72
 
73
- // Specific page requested
74
- const pageNum = parseInt(page, 10);
75
- if (isNaN(pageNum) || pageNum < 0) {
76
- return new Response(
77
- JSON.stringify({ error: "page must be a non-negative integer" }),
78
- { status: 400, headers: { 'Content-Type': 'application/json' } }
 
 
 
 
 
 
 
79
  );
 
 
 
 
 
 
 
 
 
80
  }
81
 
82
  const pageData = pagesData.find(p => p.document?.pages?.[0] === pageNum);
 
83
  if (!pageData) {
84
  return new Response(
85
  JSON.stringify({ error: `Page ${pageNum} not found in doc ${indexNum}` }),
 
6
  return process.env.HF_TOKEN && process.env.NODE_ENV !== 'development';
7
  };
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  export async function GET(request) {
10
  const { searchParams } = new URL(request.url);
11
  const index = searchParams.get('index');
12
  const page = searchParams.get('page');
13
 
14
+ // Validate required params
15
+ if (index === null || page === null) {
16
  return new Response(
17
+ JSON.stringify({ error: "Missing index or page parameter" }),
18
  { status: 400, headers: { 'Content-Type': 'application/json' } }
19
  );
20
  }
21
 
22
+ // Validate numeric values
23
  const indexNum = parseInt(index, 10);
24
+ const pageNum = parseInt(page, 10);
25
+
26
+ if (isNaN(indexNum) || isNaN(pageNum) || indexNum < 0 || pageNum < 0) {
27
  return new Response(
28
+ JSON.stringify({ error: "index and page must be non-negative integers" }),
29
  { status: 400, headers: { 'Content-Type': 'application/json' } }
30
  );
31
  }
32
 
33
  try {
34
+ let pagesData;
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ if (isHFSpace()) {
37
+ // Production: fetch from HuggingFace
38
+ const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${indexNum}/raw/doc_${indexNum}_direct_judged.jsonl`;
39
+ const res = await fetch(docUrl, {
40
+ headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
41
  });
 
42
 
43
+ if (!res.ok) {
44
+ return new Response(
45
+ JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found on HF Datasets` }),
46
+ { status: res.status, headers: { 'Content-Type': 'application/json' } }
47
+ );
48
+ }
49
+ pagesData = await res.json();
50
+ } else {
51
+ // Local dev: read from local file (reflects saved annotations immediately)
52
+ const filePath = path.join(
53
+ process.cwd(),
54
+ 'annotation_data', 'wbg_extractions',
55
+ `doc_${indexNum}`, 'raw', `doc_${indexNum}_direct_judged.jsonl`
56
  );
57
+
58
+ if (!fs.existsSync(filePath)) {
59
+ return new Response(
60
+ JSON.stringify({ error: `doc_${indexNum}_direct_judged.jsonl not found locally` }),
61
+ { status: 404, headers: { 'Content-Type': 'application/json' } }
62
+ );
63
+ }
64
+ const raw = fs.readFileSync(filePath, 'utf-8');
65
+ pagesData = JSON.parse(raw);
66
  }
67
 
68
  const pageData = pagesData.find(p => p.document?.pages?.[0] === pageNum);
69
+
70
  if (!pageData) {
71
  return new Response(
72
  JSON.stringify({ error: `Page ${pageNum} not found in doc ${indexNum}` }),
app/api/documents/route.js CHANGED
@@ -1,4 +1,4 @@
1
- import { HF_DATASET_BASE_URL } from '../../../utils/config.js';
2
 
3
  export async function GET() {
4
  try {
@@ -21,17 +21,38 @@ export async function GET() {
21
 
22
  const links = await linksRes.json();
23
 
24
- // Return ALL successful links no limit, no data pre-fetching.
25
- // Page data is loaded on-demand when the user selects a document.
26
- const documents = links
27
- .filter(l => l.status === 'success')
28
- .map(link => ({
29
- index: link.index,
30
- pdf_url: link.direct_pdf_url,
31
- landing_page: link.landing_page_url,
32
- // annotatable_pages will be fetched on-demand via /api/document
33
- annotatable_pages: null,
34
- }));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  return new Response(JSON.stringify(documents), {
37
  status: 200,
 
1
+ import { HF_DATASET_BASE_URL, MAX_DOCS_TO_SCAN } from '../../../utils/config.js';
2
 
3
  export async function GET() {
4
  try {
 
21
 
22
  const links = await linksRes.json();
23
 
24
+ // Filter to successful links and take the first N
25
+ const successLinks = links.filter(l => l.status === 'success').slice(0, MAX_DOCS_TO_SCAN);
26
+
27
+ // Parallel fetch much faster than sequential scanning
28
+ const results = await Promise.allSettled(
29
+ successLinks.map(async (link) => {
30
+ const docUrl = `${HF_DATASET_BASE_URL}/raw/main/annotation_data/wbg_extractions/doc_${link.index}/raw/doc_${link.index}_direct_judged.jsonl`;
31
+ const docRes = await fetch(docUrl, {
32
+ headers: { 'Authorization': `Bearer ${process.env.HF_TOKEN}` }
33
+ });
34
+
35
+ if (!docRes.ok) return null;
36
+
37
+ const pagesData = await docRes.json();
38
+ const annotatablePages = pagesData
39
+ .filter(page => page.datasets && page.datasets.length > 0)
40
+ .map(page => page.document.pages[0]);
41
+
42
+ if (annotatablePages.length === 0) return null;
43
+
44
+ return {
45
+ index: link.index,
46
+ pdf_url: link.direct_pdf_url,
47
+ landing_page: link.landing_page_url,
48
+ annotatable_pages: annotatablePages
49
+ };
50
+ })
51
+ );
52
+
53
+ const documents = results
54
+ .filter(r => r.status === 'fulfilled' && r.value !== null)
55
+ .map(r => r.value);
56
 
57
  return new Response(JSON.stringify(documents), {
58
  status: 200,
app/components/PdfViewer.js CHANGED
@@ -1,9 +1,16 @@
1
  "use client";
2
 
3
- import { useState } from 'react';
4
 
5
  export default function PdfViewer({ pdfUrl, pageNumber }) {
6
- const [pdfLoading, setPdfLoading] = useState(true);
 
 
 
 
 
 
 
7
 
8
  if (!pdfUrl) {
9
  return (
@@ -17,14 +24,25 @@ export default function PdfViewer({ pdfUrl, pageNumber }) {
17
  const viewerPage = (pageNumber ?? 0) + 1;
18
 
19
  // Use Mozilla's hosted PDF.js viewer — supports #page=N for direct page navigation.
 
20
  const pdfJsViewerUrl = `https://mozilla.github.io/pdf.js/web/viewer.html?file=${encodeURIComponent(pdfUrl)}#page=${viewerPage}`;
21
 
22
  return (
23
  <div className="pdf-container">
24
- {pdfLoading && (
25
- <div className="pdf-loading-overlay">
26
  <div className="loading-spinner" />
27
  <p>Loading PDF...</p>
 
 
 
 
 
 
 
 
 
 
28
  </div>
29
  )}
30
  <iframe
@@ -33,7 +51,9 @@ export default function PdfViewer({ pdfUrl, pageNumber }) {
33
  className="pdf-frame"
34
  title={`PDF Page ${viewerPage}`}
35
  allow="fullscreen"
36
- onLoad={() => setPdfLoading(false)}
 
 
37
  />
38
  </div>
39
  );
 
1
  "use client";
2
 
3
+ import { useState, useEffect } from 'react';
4
 
5
  export default function PdfViewer({ pdfUrl, pageNumber }) {
6
+ const [loading, setLoading] = useState(true);
7
+ const [error, setError] = useState(false);
8
+
9
+ // Reset loading state when URL or page changes
10
+ useEffect(() => {
11
+ setLoading(true);
12
+ setError(false);
13
+ }, [pdfUrl, pageNumber]);
14
 
15
  if (!pdfUrl) {
16
  return (
 
24
  const viewerPage = (pageNumber ?? 0) + 1;
25
 
26
  // Use Mozilla's hosted PDF.js viewer — supports #page=N for direct page navigation.
27
+ // This avoids X-Frame-Options restrictions from the source server.
28
  const pdfJsViewerUrl = `https://mozilla.github.io/pdf.js/web/viewer.html?file=${encodeURIComponent(pdfUrl)}#page=${viewerPage}`;
29
 
30
  return (
31
  <div className="pdf-container">
32
+ {loading && !error && (
33
+ <div className="pdf-loading">
34
  <div className="loading-spinner" />
35
  <p>Loading PDF...</p>
36
+ <p className="pdf-loading-hint">This may take a moment for large documents.</p>
37
+ </div>
38
+ )}
39
+ {error && (
40
+ <div className="pdf-error">
41
+ <p>⚠️ Failed to load PDF</p>
42
+ <p className="pdf-error-hint">The document may be unavailable or too large.</p>
43
+ <a href={pdfUrl} target="_blank" rel="noopener noreferrer" className="btn btn-secondary">
44
+ Open PDF directly ↗
45
+ </a>
46
  </div>
47
  )}
48
  <iframe
 
51
  className="pdf-frame"
52
  title={`PDF Page ${viewerPage}`}
53
  allow="fullscreen"
54
+ style={{ display: loading && !error ? 'none' : 'block' }}
55
+ onLoad={() => setLoading(false)}
56
+ onError={() => { setLoading(false); setError(true); }}
57
  />
58
  </div>
59
  );
app/globals.css CHANGED
@@ -236,32 +236,38 @@ h4 {
236
  min-height: 0;
237
  }
238
 
239
- .pdf-loading-overlay {
240
- position: absolute;
241
- inset: 0;
242
- z-index: 5;
 
 
 
 
 
 
 
 
 
243
  display: flex;
244
  flex-direction: column;
245
  align-items: center;
246
  justify-content: center;
247
- gap: 12px;
248
- background: var(--pane-bg);
249
  border-radius: 12px;
 
 
250
  }
251
 
252
- .pdf-loading-overlay p {
253
- font-size: 0.85rem;
254
- color: #94a3b8;
255
  }
256
 
257
- .pdf-frame {
258
- flex: 1;
259
- width: 100%;
260
- min-height: 0;
261
- border: none;
262
- background: white;
263
- border-radius: 12px;
264
- box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
265
  }
266
 
267
  .pdf-placeholder {
 
236
  min-height: 0;
237
  }
238
 
239
+ .pdf-frame {
240
+ flex: 1;
241
+ width: 100%;
242
+ min-height: 0;
243
+ border: none;
244
+ background: white;
245
+ border-radius: 12px;
246
+ box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
247
+ }
248
+
249
+ .pdf-loading,
250
+ .pdf-error {
251
+ flex: 1;
252
  display: flex;
253
  flex-direction: column;
254
  align-items: center;
255
  justify-content: center;
256
+ background-color: var(--surface);
 
257
  border-radius: 12px;
258
+ color: #94a3b8;
259
+ gap: 8px;
260
  }
261
 
262
+ .pdf-loading p,
263
+ .pdf-error p {
264
+ margin: 0;
265
  }
266
 
267
+ .pdf-loading-hint,
268
+ .pdf-error-hint {
269
+ font-size: 0.75rem;
270
+ color: #64748b;
 
 
 
 
271
  }
272
 
273
  .pdf-placeholder {
app/page.js CHANGED
@@ -78,36 +78,11 @@ export default function Home() {
78
  }
79
  }, []);
80
 
81
- // Fetch annotatable pages on-demand when document selection changes
82
  useEffect(() => {
83
  if (selectedDocIndex !== null) {
84
  const doc = documents.find(d => d.index === selectedDocIndex);
85
- if (!doc) return;
86
-
87
- // If annotatable_pages already loaded (cached), use it
88
- if (doc.annotatable_pages) {
89
- setCurrentDoc(doc);
90
- return;
91
- }
92
-
93
- // Fetch annotatable pages on-demand
94
- setLoadingPage(true);
95
- fetch(`/api/document?index=${selectedDocIndex}`)
96
- .then(res => res.json())
97
- .then(data => {
98
- const updatedDoc = { ...doc, annotatable_pages: data.annotatable_pages || [] };
99
- setCurrentDoc(updatedDoc);
100
- // Cache in the documents array
101
- setDocuments(prev => prev.map(d =>
102
- d.index === selectedDocIndex ? updatedDoc : d
103
- ));
104
- setLoadingPage(false);
105
- })
106
- .catch(err => {
107
- console.error("Failed to load document pages", err);
108
- setCurrentDoc({ ...doc, annotatable_pages: [] });
109
- setLoadingPage(false);
110
- });
111
  }
112
  }, [selectedDocIndex, documents]);
113
 
 
78
  }
79
  }, []);
80
 
81
+ // Update currentDoc when selection changes
82
  useEffect(() => {
83
  if (selectedDocIndex !== null) {
84
  const doc = documents.find(d => d.index === selectedDocIndex);
85
+ setCurrentDoc(doc);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
  }, [selectedDocIndex, documents]);
88