biscuit / app.py
MFF212's picture
Update app.py
dc52e17 verified
"""
Streamlit App: PDF & JSON Response Viewer
View PDFs from EDTReports with their corresponding JSON responses
"""
import streamlit as st
import json
from pathlib import Path
import base64
import fitz # PyMuPDF
from PIL import Image
import io
# Configuration
PDF_DIR = "EDTReports"
JSON_DIR = "response"
# Page config
st.set_page_config(
page_title="PDF & JSON Viewer",
page_icon="πŸ“„",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f77b4;
text-align: center;
margin-bottom: 2rem;
}
.pdf-container {
border: 2px solid #e0e0e0;
border-radius: 10px;
padding: 10px;
background-color: #f9f9f9;
}
.json-container {
background-color: #f5f5f5;
border-radius: 10px;
padding: 20px;
max-height: 800px;
overflow-y: auto;
}
.section-header {
background-color: #1f77b4;
color: white;
padding: 10px;
border-radius: 5px;
margin: 10px 0;
font-weight: bold;
}
.info-box {
background-color: #e3f2fd;
padding: 15px;
border-radius: 8px;
border-left: 4px solid #1f77b4;
margin: 10px 0;
}
.success-box {
background-color: #e8f5e9;
padding: 10px;
border-radius: 5px;
border-left: 4px solid #4caf50;
}
.warning-box {
background-color: #fff3e0;
padding: 10px;
border-radius: 5px;
border-left: 4px solid #ff9800;
}
</style>
""", unsafe_allow_html=True)
def get_pdf_files():
"""Get all PDF files from EDTReports directory"""
pdf_dir = Path(PDF_DIR)
if not pdf_dir.exists():
return []
return sorted(list(pdf_dir.glob("*.pdf")))
def get_json_for_pdf(pdf_filename):
"""Get corresponding JSON file for a PDF"""
# Remove .pdf extension and add .json
json_filename = pdf_filename.replace('.pdf', '.json')
json_path = Path(JSON_DIR) / json_filename
if json_path.exists():
try:
with open(json_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
return {"error": f"Failed to load JSON: {str(e)}"}
return None
def display_pdf(pdf_path, zoom_level=2.0):
"""Display PDF by converting pages to images using PyMuPDF"""
try:
# Open PDF
pdf_document = fitz.open(pdf_path)
# Get total pages
total_pages = len(pdf_document)
# Page selector
if total_pages > 1:
page_num = st.slider("πŸ“– Select Page", 1, total_pages, 1) - 1
else:
page_num = 0
# Render page
page = pdf_document[page_num]
# Convert to image with adjustable resolution
mat = fitz.Matrix(zoom_level, zoom_level)
pix = page.get_pixmap(matrix=mat)
# Convert to PIL Image
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Display image
st.image(img, use_column_width=True)
# Show page info
if total_pages > 1:
st.caption(f"Page {page_num + 1} of {total_pages}")
pdf_document.close()
except Exception as e:
st.error(f"Error displaying PDF: {str(e)}")
# Fallback: Provide download link
st.warning("PDF could not be displayed. Click below to download:")
with open(pdf_path, "rb") as f:
st.download_button(
label="πŸ“₯ Download PDF",
data=f.read(),
file_name=pdf_path.name,
mime="application/pdf"
)
def display_json_section(title, data, level=0):
"""Recursively display JSON data in a structured format"""
indent = " " * level
if isinstance(data, dict):
if level == 0:
st.markdown(f'<div class="section-header">{title}</div>', unsafe_allow_html=True)
else:
st.markdown(f"**{title}**")
for key, value in data.items():
if isinstance(value, dict):
display_json_section(key, value, level + 1)
elif isinstance(value, list):
st.markdown(f"{indent}**{key}:**")
for item in value:
if isinstance(item, dict):
display_json_section("", item, level + 1)
else:
st.markdown(f"{indent} β€’ {item}")
else:
st.markdown(f"{indent}**{key}:** {value}")
elif isinstance(data, list):
for item in data:
if isinstance(item, dict):
display_json_section("", item, level)
else:
st.markdown(f"{indent}β€’ {item}")
else:
st.markdown(f"{indent}{data}")
def main():
# Header
st.markdown('<div class="main-header">πŸ“„ PDF & JSON Response Viewer</div>', unsafe_allow_html=True)
# Get all PDF files
pdf_files = get_pdf_files()
if not pdf_files:
st.error(f"No PDF files found in '{PDF_DIR}' directory")
return
# Sidebar
with st.sidebar:
st.markdown("### πŸ“ File Selection")
# Search box
search_term = st.text_input("πŸ” Search files:", "")
# Filter files
if search_term:
filtered_files = [f for f in pdf_files if search_term.lower() in f.name.lower()]
else:
filtered_files = pdf_files
st.markdown(f"**Found: {len(filtered_files)} files**")
# File selector
if filtered_files:
selected_file = st.selectbox(
"Select a PDF file:",
filtered_files,
format_func=lambda x: x.name
)
else:
st.warning("No files match your search")
return
st.markdown("---")
# Statistics
st.markdown("### πŸ“Š Statistics")
total_pdfs = len(pdf_files)
total_jsons = len(list(Path(JSON_DIR).glob("*.json"))) if Path(JSON_DIR).exists() else 0
st.metric("Total PDFs", total_pdfs)
st.metric("Total JSONs", total_jsons)
# Navigation buttons
st.markdown("---")
st.markdown("### πŸ”„ Quick Navigation")
col1, col2 = st.columns(2)
current_idx = filtered_files.index(selected_file)
with col1:
if st.button("⬅️ Previous"):
if current_idx > 0:
selected_file = filtered_files[current_idx - 1]
st.rerun()
with col2:
if st.button("Next ➑️"):
if current_idx < len(filtered_files) - 1:
selected_file = filtered_files[current_idx + 1]
st.rerun()
# Main content
if selected_file:
# File info
st.markdown(f'<div class="info-box">', unsafe_allow_html=True)
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"**Selected File:** `{selected_file.name}`")
with col2:
file_size = selected_file.stat().st_size / 1024
st.markdown(f"**Size:** {file_size:.1f} KB")
st.markdown('</div>', unsafe_allow_html=True)
# Create two columns
col_pdf, col_json = st.columns([1, 1])
# PDF Column
with col_pdf:
st.markdown("### πŸ“„ PDF Document")
# Zoom control
zoom_level = st.select_slider(
"πŸ” Zoom Level",
options=[1.0, 1.5, 2.0, 2.5, 3.0],
value=2.0,
format_func=lambda x: f"{int(x*100)}%"
)
with st.container():
st.markdown('<div class="pdf-container">', unsafe_allow_html=True)
display_pdf(selected_file, zoom_level)
st.markdown('</div>', unsafe_allow_html=True)
# JSON Column
with col_json:
st.markdown("### πŸ“‹ JSON Response")
json_data = get_json_for_pdf(selected_file.name)
if json_data:
if "error" in json_data:
st.error(json_data["error"])
else:
with st.container():
st.markdown('<div class="json-container">', unsafe_allow_html=True)
# Status
if json_data.get("status") == "success":
st.markdown('<div class="success-box">βœ… Status: Success</div>', unsafe_allow_html=True)
# Display sections
if "data" in json_data:
data = json_data["data"]
# Section 1
if "SECTION 1" in data or "SECTION_1" in data:
section1 = data.get("SECTION 1") or data.get("SECTION_1")
display_json_section("SECTION 1: Subject Identification", section1)
# Section 2
if "SECTION 2" in data or "SECTION_2" in data:
section2 = data.get("SECTION 2") or data.get("SECTION_2")
display_json_section("SECTION 2: Findings & Interpretation", section2)
# Section 3
if "SECTION 3" in data or "SECTION_3" in data:
section3 = data.get("SECTION 3") or data.get("SECTION_3")
display_json_section("SECTION 3: Brief Medical Summary", section3)
# Section 4
if "SECTION 4" in data or "SECTION_4" in data:
section4 = data.get("SECTION 4") or data.get("SECTION_4")
display_json_section("SECTION 4: Life Insurance Underwriting View", section4)
# Raw JSON (collapsible)
with st.expander("πŸ“ View Raw JSON"):
st.json(json_data)
st.markdown('</div>', unsafe_allow_html=True)
else:
st.markdown('<div class="warning-box">⚠️ No JSON response found for this PDF</div>', unsafe_allow_html=True)
st.info(f"Expected file: `{JSON_DIR}/{selected_file.name.replace('.pdf', '.json')}`")
if __name__ == "__main__":
main()