Wenyu Zhang commited on
Commit
c45861a
·
1 Parent(s): 68e42cd

add application file

Browse files
Files changed (2) hide show
  1. app.py +10 -37
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
 
3
  import fitz
4
  import os
@@ -1010,15 +1011,7 @@ def find_reference_pages(pdf_path):
1010
  doc.close()
1011
  return ref_pages, start_page, end_page, ref_text
1012
 
1013
- def encode_pdf_to_base64(path):
1014
- """Encode PDF file to base64 string for embedding."""
1015
- try:
1016
- with open(path, "rb") as f:
1017
- encoded_string = base64.b64encode(f.read()).decode('utf-8')
1018
- return encoded_string
1019
- except Exception as e:
1020
- print(f"Error encoding PDF: {e}")
1021
- return None
1022
 
1023
  def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citations, state_removed_citations, state_appendix_header, state_ref_text):
1024
  """Initial PDF processing - find references and show PDF immediately."""
@@ -1068,18 +1061,10 @@ def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citatio
1068
 
1069
  status += "\n⏳ Starting automatic extraction... Please wait."
1070
 
1071
- basename = os.path.basename(new_pdf_path)
1072
-
1073
- # Generate Base64 for the full PDF view
1074
- # This bypasses browser security restrictions on local files
1075
- b64_pdf = encode_pdf_to_base64(new_pdf_path)
1076
- if b64_pdf:
1077
- iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
1078
- else:
1079
- iframe_html = '<div style="padding:20px">Error loading PDF viewer.</div>'
1080
 
 
1081
  return (new_pdf_path, status,
1082
- gr.update(value=iframe_html, visible=True),
1083
  gr.update(visible=True, value="Show Full PDF"),
1084
  gr.update(visible=False), # Citations display
1085
  gr.update(interactive=False, visible=False), # Verify Button
@@ -1941,13 +1926,7 @@ def update_view(view_mode, state_pdf_path, state_ref_pages, state_citations, sta
1941
 
1942
  # Check cache first
1943
  if state_ref_pdf_path and os.path.exists(state_ref_pdf_path):
1944
- # Return iframe HTML with Base64
1945
- b64_pdf = encode_pdf_to_base64(state_ref_pdf_path)
1946
- if b64_pdf:
1947
- iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
1948
- else:
1949
- iframe_html = "Error loading PDF."
1950
- upd_ref_pdf = gr.update(value=iframe_html)
1951
  else:
1952
  # Generate the Subset PDF if needed.
1953
  if state_ref_pages and state_pdf_path:
@@ -1962,13 +1941,7 @@ def update_view(view_mode, state_pdf_path, state_ref_pages, state_citations, sta
1962
  doc.close()
1963
 
1964
  state_ref_pdf_path = output_path
1965
- # Return iframe HTML with Base64
1966
- b64_pdf = encode_pdf_to_base64(output_path)
1967
- if b64_pdf:
1968
- iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
1969
- else:
1970
- iframe_html = "Error loading PDF."
1971
- upd_ref_pdf = gr.update(value=iframe_html)
1972
 
1973
  yield (vis_full, vis_ref, vis_cit, vis_ver, upd_ref_pdf, upd_cit_disp, upd_ver_disp, upd_load, state_ref_pdf_path)
1974
 
@@ -2207,13 +2180,13 @@ with gr.Blocks(title="CiteAudit", css="""
2207
 
2208
  # 1. Full PDF View
2209
  with gr.Group(visible=True) as view_full_pdf:
2210
- # Use standard HTML iframe for better browser compatibility (specifically Safari)
2211
- pdf_viewer_full = gr.HTML(label="Full PDF", elem_id="pdf-viewer-full")
2212
 
2213
  # 2. Reference Pages View
2214
  with gr.Group(visible=False) as view_ref_pages:
2215
- # Use standard HTML iframe
2216
- pdf_viewer_ref = gr.HTML(label="Reference Pages", elem_id="pdf-viewer-ref")
2217
 
2218
  # 3. Citations View
2219
  with gr.Group(visible=False, elem_id="view-citations") as view_citations:
 
1
  import gradio as gr
2
+ from gradio_pdf import PDF
3
 
4
  import fitz
5
  import os
 
1011
  doc.close()
1012
  return ref_pages, start_page, end_page, ref_text
1013
 
1014
+
 
 
 
 
 
 
 
 
1015
 
1016
  def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citations, state_removed_citations, state_appendix_header, state_ref_text):
1017
  """Initial PDF processing - find references and show PDF immediately."""
 
1061
 
1062
  status += "\n⏳ Starting automatic extraction... Please wait."
1063
 
 
 
 
 
 
 
 
 
 
1064
 
1065
+ # Return directly to the PDF component
1066
  return (new_pdf_path, status,
1067
+ gr.update(value=new_pdf_path, visible=True),
1068
  gr.update(visible=True, value="Show Full PDF"),
1069
  gr.update(visible=False), # Citations display
1070
  gr.update(interactive=False, visible=False), # Verify Button
 
1926
 
1927
  # Check cache first
1928
  if state_ref_pdf_path and os.path.exists(state_ref_pdf_path):
1929
+ upd_ref_pdf = gr.update(value=state_ref_pdf_path)
 
 
 
 
 
 
1930
  else:
1931
  # Generate the Subset PDF if needed.
1932
  if state_ref_pages and state_pdf_path:
 
1941
  doc.close()
1942
 
1943
  state_ref_pdf_path = output_path
1944
+ upd_ref_pdf = gr.update(value=output_path)
 
 
 
 
 
 
1945
 
1946
  yield (vis_full, vis_ref, vis_cit, vis_ver, upd_ref_pdf, upd_cit_disp, upd_ver_disp, upd_load, state_ref_pdf_path)
1947
 
 
2180
 
2181
  # 1. Full PDF View
2182
  with gr.Group(visible=True) as view_full_pdf:
2183
+ # Use standard PDF component
2184
+ pdf_viewer_full = PDF(label="Full PDF", height=700, elem_id="pdf-viewer-full", interactive=False)
2185
 
2186
  # 2. Reference Pages View
2187
  with gr.Group(visible=False) as view_ref_pages:
2188
+ # Use standard PDF component
2189
+ pdf_viewer_ref = PDF(label="Reference Pages", height=700, elem_id="pdf-viewer-ref", interactive=False)
2190
 
2191
  # 3. Citations View
2192
  with gr.Group(visible=False, elem_id="view-citations") as view_citations:
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  PyMuPDF
2
  gradio==4.31.0
3
 
 
4
  pydantic==2.10.6
5
  grobid-client-python
6
  huggingface_hub<1.0.0
 
1
  PyMuPDF
2
  gradio==4.31.0
3
 
4
+ gradio_pdf
5
  pydantic==2.10.6
6
  grobid-client-python
7
  huggingface_hub<1.0.0