|
|
""" |
|
|
Streamlit App: PDF & JSON Response Viewer |
|
|
View PDFs from EDTReports with their corresponding JSON responses |
|
|
""" |
|
|
|
|
|
import streamlit as st |
|
|
import json |
|
|
from pathlib import Path |
|
|
import base64 |
|
|
import fitz |
|
|
from PIL import Image |
|
|
import io |
|
|
|
|
|
|
|
|
PDF_DIR = "EDTReports" |
|
|
JSON_DIR = "response" |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="PDF & JSON Viewer", |
|
|
page_icon="π", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.main-header { |
|
|
font-size: 2.5rem; |
|
|
font-weight: bold; |
|
|
color: #1f77b4; |
|
|
text-align: center; |
|
|
margin-bottom: 2rem; |
|
|
} |
|
|
.pdf-container { |
|
|
border: 2px solid #e0e0e0; |
|
|
border-radius: 10px; |
|
|
padding: 10px; |
|
|
background-color: #f9f9f9; |
|
|
} |
|
|
.json-container { |
|
|
background-color: #f5f5f5; |
|
|
border-radius: 10px; |
|
|
padding: 20px; |
|
|
max-height: 800px; |
|
|
overflow-y: auto; |
|
|
} |
|
|
.section-header { |
|
|
background-color: #1f77b4; |
|
|
color: white; |
|
|
padding: 10px; |
|
|
border-radius: 5px; |
|
|
margin: 10px 0; |
|
|
font-weight: bold; |
|
|
} |
|
|
.info-box { |
|
|
background-color: #e3f2fd; |
|
|
padding: 15px; |
|
|
border-radius: 8px; |
|
|
border-left: 4px solid #1f77b4; |
|
|
margin: 10px 0; |
|
|
} |
|
|
.success-box { |
|
|
background-color: #e8f5e9; |
|
|
padding: 10px; |
|
|
border-radius: 5px; |
|
|
border-left: 4px solid #4caf50; |
|
|
} |
|
|
.warning-box { |
|
|
background-color: #fff3e0; |
|
|
padding: 10px; |
|
|
border-radius: 5px; |
|
|
border-left: 4px solid #ff9800; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def get_pdf_files(): |
|
|
"""Get all PDF files from EDTReports directory""" |
|
|
pdf_dir = Path(PDF_DIR) |
|
|
if not pdf_dir.exists(): |
|
|
return [] |
|
|
return sorted(list(pdf_dir.glob("*.pdf"))) |
|
|
|
|
|
|
|
|
def get_json_for_pdf(pdf_filename): |
|
|
"""Get corresponding JSON file for a PDF""" |
|
|
|
|
|
json_filename = pdf_filename.replace('.pdf', '.json') |
|
|
json_path = Path(JSON_DIR) / json_filename |
|
|
|
|
|
if json_path.exists(): |
|
|
try: |
|
|
with open(json_path, 'r', encoding='utf-8') as f: |
|
|
return json.load(f) |
|
|
except Exception as e: |
|
|
return {"error": f"Failed to load JSON: {str(e)}"} |
|
|
return None |
|
|
|
|
|
|
|
|
def display_pdf(pdf_path, zoom_level=2.0): |
|
|
"""Display PDF by converting pages to images using PyMuPDF""" |
|
|
try: |
|
|
|
|
|
pdf_document = fitz.open(pdf_path) |
|
|
|
|
|
|
|
|
total_pages = len(pdf_document) |
|
|
|
|
|
|
|
|
if total_pages > 1: |
|
|
page_num = st.slider("π Select Page", 1, total_pages, 1) - 1 |
|
|
else: |
|
|
page_num = 0 |
|
|
|
|
|
|
|
|
page = pdf_document[page_num] |
|
|
|
|
|
|
|
|
mat = fitz.Matrix(zoom_level, zoom_level) |
|
|
pix = page.get_pixmap(matrix=mat) |
|
|
|
|
|
|
|
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
|
|
|
|
|
|
|
st.image(img, use_column_width=True) |
|
|
|
|
|
|
|
|
if total_pages > 1: |
|
|
st.caption(f"Page {page_num + 1} of {total_pages}") |
|
|
|
|
|
pdf_document.close() |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error displaying PDF: {str(e)}") |
|
|
|
|
|
|
|
|
st.warning("PDF could not be displayed. Click below to download:") |
|
|
with open(pdf_path, "rb") as f: |
|
|
st.download_button( |
|
|
label="π₯ Download PDF", |
|
|
data=f.read(), |
|
|
file_name=pdf_path.name, |
|
|
mime="application/pdf" |
|
|
) |
|
|
|
|
|
|
|
|
def display_json_section(title, data, level=0): |
|
|
"""Recursively display JSON data in a structured format""" |
|
|
indent = " " * level |
|
|
|
|
|
if isinstance(data, dict): |
|
|
if level == 0: |
|
|
st.markdown(f'<div class="section-header">{title}</div>', unsafe_allow_html=True) |
|
|
else: |
|
|
st.markdown(f"**{title}**") |
|
|
|
|
|
for key, value in data.items(): |
|
|
if isinstance(value, dict): |
|
|
display_json_section(key, value, level + 1) |
|
|
elif isinstance(value, list): |
|
|
st.markdown(f"{indent}**{key}:**") |
|
|
for item in value: |
|
|
if isinstance(item, dict): |
|
|
display_json_section("", item, level + 1) |
|
|
else: |
|
|
st.markdown(f"{indent} β’ {item}") |
|
|
else: |
|
|
st.markdown(f"{indent}**{key}:** {value}") |
|
|
elif isinstance(data, list): |
|
|
for item in data: |
|
|
if isinstance(item, dict): |
|
|
display_json_section("", item, level) |
|
|
else: |
|
|
st.markdown(f"{indent}β’ {item}") |
|
|
else: |
|
|
st.markdown(f"{indent}{data}") |
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
st.markdown('<div class="main-header">π PDF & JSON Response Viewer</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
pdf_files = get_pdf_files() |
|
|
|
|
|
if not pdf_files: |
|
|
st.error(f"No PDF files found in '{PDF_DIR}' directory") |
|
|
return |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.markdown("### π File Selection") |
|
|
|
|
|
|
|
|
search_term = st.text_input("π Search files:", "") |
|
|
|
|
|
|
|
|
if search_term: |
|
|
filtered_files = [f for f in pdf_files if search_term.lower() in f.name.lower()] |
|
|
else: |
|
|
filtered_files = pdf_files |
|
|
|
|
|
st.markdown(f"**Found: {len(filtered_files)} files**") |
|
|
|
|
|
|
|
|
if filtered_files: |
|
|
selected_file = st.selectbox( |
|
|
"Select a PDF file:", |
|
|
filtered_files, |
|
|
format_func=lambda x: x.name |
|
|
) |
|
|
else: |
|
|
st.warning("No files match your search") |
|
|
return |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
st.markdown("### π Statistics") |
|
|
total_pdfs = len(pdf_files) |
|
|
total_jsons = len(list(Path(JSON_DIR).glob("*.json"))) if Path(JSON_DIR).exists() else 0 |
|
|
|
|
|
st.metric("Total PDFs", total_pdfs) |
|
|
st.metric("Total JSONs", total_jsons) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### π Quick Navigation") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
current_idx = filtered_files.index(selected_file) |
|
|
|
|
|
with col1: |
|
|
if st.button("β¬
οΈ Previous"): |
|
|
if current_idx > 0: |
|
|
selected_file = filtered_files[current_idx - 1] |
|
|
st.rerun() |
|
|
|
|
|
with col2: |
|
|
if st.button("Next β‘οΈ"): |
|
|
if current_idx < len(filtered_files) - 1: |
|
|
selected_file = filtered_files[current_idx + 1] |
|
|
st.rerun() |
|
|
|
|
|
|
|
|
if selected_file: |
|
|
|
|
|
st.markdown(f'<div class="info-box">', unsafe_allow_html=True) |
|
|
col1, col2 = st.columns([3, 1]) |
|
|
with col1: |
|
|
st.markdown(f"**Selected File:** `{selected_file.name}`") |
|
|
with col2: |
|
|
file_size = selected_file.stat().st_size / 1024 |
|
|
st.markdown(f"**Size:** {file_size:.1f} KB") |
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
col_pdf, col_json = st.columns([1, 1]) |
|
|
|
|
|
|
|
|
with col_pdf: |
|
|
st.markdown("### π PDF Document") |
|
|
|
|
|
|
|
|
zoom_level = st.select_slider( |
|
|
"π Zoom Level", |
|
|
options=[1.0, 1.5, 2.0, 2.5, 3.0], |
|
|
value=2.0, |
|
|
format_func=lambda x: f"{int(x*100)}%" |
|
|
) |
|
|
|
|
|
with st.container(): |
|
|
st.markdown('<div class="pdf-container">', unsafe_allow_html=True) |
|
|
display_pdf(selected_file, zoom_level) |
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
with col_json: |
|
|
st.markdown("### π JSON Response") |
|
|
|
|
|
json_data = get_json_for_pdf(selected_file.name) |
|
|
|
|
|
if json_data: |
|
|
if "error" in json_data: |
|
|
st.error(json_data["error"]) |
|
|
else: |
|
|
with st.container(): |
|
|
st.markdown('<div class="json-container">', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if json_data.get("status") == "success": |
|
|
st.markdown('<div class="success-box">β
Status: Success</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if "data" in json_data: |
|
|
data = json_data["data"] |
|
|
|
|
|
|
|
|
if "SECTION 1" in data or "SECTION_1" in data: |
|
|
section1 = data.get("SECTION 1") or data.get("SECTION_1") |
|
|
display_json_section("SECTION 1: Subject Identification", section1) |
|
|
|
|
|
|
|
|
if "SECTION 2" in data or "SECTION_2" in data: |
|
|
section2 = data.get("SECTION 2") or data.get("SECTION_2") |
|
|
display_json_section("SECTION 2: Findings & Interpretation", section2) |
|
|
|
|
|
|
|
|
if "SECTION 3" in data or "SECTION_3" in data: |
|
|
section3 = data.get("SECTION 3") or data.get("SECTION_3") |
|
|
display_json_section("SECTION 3: Brief Medical Summary", section3) |
|
|
|
|
|
|
|
|
if "SECTION 4" in data or "SECTION_4" in data: |
|
|
section4 = data.get("SECTION 4") or data.get("SECTION_4") |
|
|
display_json_section("SECTION 4: Life Insurance Underwriting View", section4) |
|
|
|
|
|
|
|
|
with st.expander("π View Raw JSON"): |
|
|
st.json(json_data) |
|
|
|
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
else: |
|
|
st.markdown('<div class="warning-box">β οΈ No JSON response found for this PDF</div>', unsafe_allow_html=True) |
|
|
st.info(f"Expected file: `{JSON_DIR}/{selected_file.name.replace('.pdf', '.json')}`") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|