Wenyu Zhang commited on
Commit ·
c45861a
1
Parent(s): 68e42cd
add application file
Browse files- app.py +10 -37
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
|
| 3 |
import fitz
|
| 4 |
import os
|
|
@@ -1010,15 +1011,7 @@ def find_reference_pages(pdf_path):
|
|
| 1010 |
doc.close()
|
| 1011 |
return ref_pages, start_page, end_page, ref_text
|
| 1012 |
|
| 1013 |
-
|
| 1014 |
-
"""Encode PDF file to base64 string for embedding."""
|
| 1015 |
-
try:
|
| 1016 |
-
with open(path, "rb") as f:
|
| 1017 |
-
encoded_string = base64.b64encode(f.read()).decode('utf-8')
|
| 1018 |
-
return encoded_string
|
| 1019 |
-
except Exception as e:
|
| 1020 |
-
print(f"Error encoding PDF: {e}")
|
| 1021 |
-
return None
|
| 1022 |
|
| 1023 |
def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citations, state_removed_citations, state_appendix_header, state_ref_text):
|
| 1024 |
"""Initial PDF processing - find references and show PDF immediately."""
|
|
@@ -1068,18 +1061,10 @@ def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citatio
|
|
| 1068 |
|
| 1069 |
status += "\n⏳ Starting automatic extraction... Please wait."
|
| 1070 |
|
| 1071 |
-
basename = os.path.basename(new_pdf_path)
|
| 1072 |
-
|
| 1073 |
-
# Generate Base64 for the full PDF view
|
| 1074 |
-
# This bypasses browser security restrictions on local files
|
| 1075 |
-
b64_pdf = encode_pdf_to_base64(new_pdf_path)
|
| 1076 |
-
if b64_pdf:
|
| 1077 |
-
iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
|
| 1078 |
-
else:
|
| 1079 |
-
iframe_html = '<div style="padding:20px">Error loading PDF viewer.</div>'
|
| 1080 |
|
|
|
|
| 1081 |
return (new_pdf_path, status,
|
| 1082 |
-
gr.update(value=
|
| 1083 |
gr.update(visible=True, value="Show Full PDF"),
|
| 1084 |
gr.update(visible=False), # Citations display
|
| 1085 |
gr.update(interactive=False, visible=False), # Verify Button
|
|
@@ -1941,13 +1926,7 @@ def update_view(view_mode, state_pdf_path, state_ref_pages, state_citations, sta
|
|
| 1941 |
|
| 1942 |
# Check cache first
|
| 1943 |
if state_ref_pdf_path and os.path.exists(state_ref_pdf_path):
|
| 1944 |
-
|
| 1945 |
-
b64_pdf = encode_pdf_to_base64(state_ref_pdf_path)
|
| 1946 |
-
if b64_pdf:
|
| 1947 |
-
iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
|
| 1948 |
-
else:
|
| 1949 |
-
iframe_html = "Error loading PDF."
|
| 1950 |
-
upd_ref_pdf = gr.update(value=iframe_html)
|
| 1951 |
else:
|
| 1952 |
# Generate the Subset PDF if needed.
|
| 1953 |
if state_ref_pages and state_pdf_path:
|
|
@@ -1962,13 +1941,7 @@ def update_view(view_mode, state_pdf_path, state_ref_pages, state_citations, sta
|
|
| 1962 |
doc.close()
|
| 1963 |
|
| 1964 |
state_ref_pdf_path = output_path
|
| 1965 |
-
|
| 1966 |
-
b64_pdf = encode_pdf_to_base64(output_path)
|
| 1967 |
-
if b64_pdf:
|
| 1968 |
-
iframe_html = f'<iframe src="data:application/pdf;base64,{b64_pdf}" width="100%" height="700px" style="border: none;"></iframe>'
|
| 1969 |
-
else:
|
| 1970 |
-
iframe_html = "Error loading PDF."
|
| 1971 |
-
upd_ref_pdf = gr.update(value=iframe_html)
|
| 1972 |
|
| 1973 |
yield (vis_full, vis_ref, vis_cit, vis_ver, upd_ref_pdf, upd_cit_disp, upd_ver_disp, upd_load, state_ref_pdf_path)
|
| 1974 |
|
|
@@ -2207,13 +2180,13 @@ with gr.Blocks(title="CiteAudit", css="""
|
|
| 2207 |
|
| 2208 |
# 1. Full PDF View
|
| 2209 |
with gr.Group(visible=True) as view_full_pdf:
|
| 2210 |
-
# Use standard
|
| 2211 |
-
pdf_viewer_full =
|
| 2212 |
|
| 2213 |
# 2. Reference Pages View
|
| 2214 |
with gr.Group(visible=False) as view_ref_pages:
|
| 2215 |
-
# Use standard
|
| 2216 |
-
pdf_viewer_ref =
|
| 2217 |
|
| 2218 |
# 3. Citations View
|
| 2219 |
with gr.Group(visible=False, elem_id="view-citations") as view_citations:
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from gradio_pdf import PDF
|
| 3 |
|
| 4 |
import fitz
|
| 5 |
import os
|
|
|
|
| 1011 |
doc.close()
|
| 1012 |
return ref_pages, start_page, end_page, ref_text
|
| 1013 |
|
| 1014 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1015 |
|
| 1016 |
def process_pdf_initial(pdf_file, state_pdf_path, state_ref_pages, state_citations, state_removed_citations, state_appendix_header, state_ref_text):
|
| 1017 |
"""Initial PDF processing - find references and show PDF immediately."""
|
|
|
|
| 1061 |
|
| 1062 |
status += "\n⏳ Starting automatic extraction... Please wait."
|
| 1063 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
|
| 1065 |
+
# Return directly to the PDF component
|
| 1066 |
return (new_pdf_path, status,
|
| 1067 |
+
gr.update(value=new_pdf_path, visible=True),
|
| 1068 |
gr.update(visible=True, value="Show Full PDF"),
|
| 1069 |
gr.update(visible=False), # Citations display
|
| 1070 |
gr.update(interactive=False, visible=False), # Verify Button
|
|
|
|
| 1926 |
|
| 1927 |
# Check cache first
|
| 1928 |
if state_ref_pdf_path and os.path.exists(state_ref_pdf_path):
|
| 1929 |
+
upd_ref_pdf = gr.update(value=state_ref_pdf_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1930 |
else:
|
| 1931 |
# Generate the Subset PDF if needed.
|
| 1932 |
if state_ref_pages and state_pdf_path:
|
|
|
|
| 1941 |
doc.close()
|
| 1942 |
|
| 1943 |
state_ref_pdf_path = output_path
|
| 1944 |
+
upd_ref_pdf = gr.update(value=output_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1945 |
|
| 1946 |
yield (vis_full, vis_ref, vis_cit, vis_ver, upd_ref_pdf, upd_cit_disp, upd_ver_disp, upd_load, state_ref_pdf_path)
|
| 1947 |
|
|
|
|
| 2180 |
|
| 2181 |
# 1. Full PDF View
|
| 2182 |
with gr.Group(visible=True) as view_full_pdf:
|
| 2183 |
+
# Use standard PDF component
|
| 2184 |
+
pdf_viewer_full = PDF(label="Full PDF", height=700, elem_id="pdf-viewer-full", interactive=False)
|
| 2185 |
|
| 2186 |
# 2. Reference Pages View
|
| 2187 |
with gr.Group(visible=False) as view_ref_pages:
|
| 2188 |
+
# Use standard PDF component
|
| 2189 |
+
pdf_viewer_ref = PDF(label="Reference Pages", height=700, elem_id="pdf-viewer-ref", interactive=False)
|
| 2190 |
|
| 2191 |
# 3. Citations View
|
| 2192 |
with gr.Group(visible=False, elem_id="view-citations") as view_citations:
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
PyMuPDF
|
| 2 |
gradio==4.31.0
|
| 3 |
|
|
|
|
| 4 |
pydantic==2.10.6
|
| 5 |
grobid-client-python
|
| 6 |
huggingface_hub<1.0.0
|
|
|
|
| 1 |
PyMuPDF
|
| 2 |
gradio==4.31.0
|
| 3 |
|
| 4 |
+
gradio_pdf
|
| 5 |
pydantic==2.10.6
|
| 6 |
grobid-client-python
|
| 7 |
huggingface_hub<1.0.0
|