Spaces:

MFF212
/

biscuit

Runtime error

App Files Files Community

biscuit / app.py

MFF212

Update app.py

dc52e17 verified 27 days ago

raw

history blame contribute delete

11 kB

	"""
	Streamlit App: PDF & JSON Response Viewer
	View PDFs from EDTReports with their corresponding JSON responses
	"""

	import streamlit as st
	import json
	from pathlib import Path
	import base64
	import fitz # PyMuPDF
	from PIL import Image
	import io

	# Configuration
	PDF_DIR = "EDTReports"
	JSON_DIR = "response"

	# Page config
	st.set_page_config(
	page_title="PDF & JSON Viewer",
	page_icon="📄",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1f77b4;
	text-align: center;
	margin-bottom: 2rem;
	}
	.pdf-container {
	border: 2px solid #e0e0e0;
	border-radius: 10px;
	padding: 10px;
	background-color: #f9f9f9;
	}
	.json-container {
	background-color: #f5f5f5;
	border-radius: 10px;
	padding: 20px;
	max-height: 800px;
	overflow-y: auto;
	}
	.section-header {
	background-color: #1f77b4;
	color: white;
	padding: 10px;
	border-radius: 5px;
	margin: 10px 0;
	font-weight: bold;
	}
	.info-box {
	background-color: #e3f2fd;
	padding: 15px;
	border-radius: 8px;
	border-left: 4px solid #1f77b4;
	margin: 10px 0;
	}
	.success-box {
	background-color: #e8f5e9;
	padding: 10px;
	border-radius: 5px;
	border-left: 4px solid #4caf50;
	}
	.warning-box {
	background-color: #fff3e0;
	padding: 10px;
	border-radius: 5px;
	border-left: 4px solid #ff9800;
	}
	</style>
	""", unsafe_allow_html=True)


	def get_pdf_files():
	"""Get all PDF files from EDTReports directory"""
	pdf_dir = Path(PDF_DIR)
	if not pdf_dir.exists():
	return []
	return sorted(list(pdf_dir.glob("*.pdf")))


	def get_json_for_pdf(pdf_filename):
	"""Get corresponding JSON file for a PDF"""
	# Remove .pdf extension and add .json
	json_filename = pdf_filename.replace('.pdf', '.json')
	json_path = Path(JSON_DIR) / json_filename

	if json_path.exists():
	try:
	with open(json_path, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception as e:
	return {"error": f"Failed to load JSON: {str(e)}"}
	return None


	def display_pdf(pdf_path, zoom_level=2.0):
	"""Display PDF by converting pages to images using PyMuPDF"""
	try:
	# Open PDF
	pdf_document = fitz.open(pdf_path)

	# Get total pages
	total_pages = len(pdf_document)

	# Page selector
	if total_pages > 1:
	page_num = st.slider("📖 Select Page", 1, total_pages, 1) - 1
	else:
	page_num = 0

	# Render page
	page = pdf_document[page_num]

	# Convert to image with adjustable resolution
	mat = fitz.Matrix(zoom_level, zoom_level)
	pix = page.get_pixmap(matrix=mat)

	# Convert to PIL Image
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	# Display image
	st.image(img, use_column_width=True)

	# Show page info
	if total_pages > 1:
	st.caption(f"Page {page_num + 1} of {total_pages}")

	pdf_document.close()

	except Exception as e:
	st.error(f"Error displaying PDF: {str(e)}")

	# Fallback: Provide download link
	st.warning("PDF could not be displayed. Click below to download:")
	with open(pdf_path, "rb") as f:
	st.download_button(
	label="📥 Download PDF",
	data=f.read(),
	file_name=pdf_path.name,
	mime="application/pdf"
	)


	def display_json_section(title, data, level=0):
	"""Recursively display JSON data in a structured format"""
	indent = " " * level

	if isinstance(data, dict):
	if level == 0:
	st.markdown(f'<div class="section-header">{title}</div>', unsafe_allow_html=True)
	else:
	st.markdown(f"{title}")

	for key, value in data.items():
	if isinstance(value, dict):
	display_json_section(key, value, level + 1)
	elif isinstance(value, list):
	st.markdown(f"{indent}{key}:")
	for item in value:
	if isinstance(item, dict):
	display_json_section("", item, level + 1)
	else:
	st.markdown(f"{indent} • {item}")
	else:
	st.markdown(f"{indent}{key}: {value}")
	elif isinstance(data, list):
	for item in data:
	if isinstance(item, dict):
	display_json_section("", item, level)
	else:
	st.markdown(f"{indent}• {item}")
	else:
	st.markdown(f"{indent}{data}")


	def main():
	# Header
	st.markdown('<div class="main-header">📄 PDF & JSON Response Viewer</div>', unsafe_allow_html=True)

	# Get all PDF files
	pdf_files = get_pdf_files()

	if not pdf_files:
	st.error(f"No PDF files found in '{PDF_DIR}' directory")
	return

	# Sidebar
	with st.sidebar:
	st.markdown("### 📁 File Selection")

	# Search box
	search_term = st.text_input("🔍 Search files:", "")

	# Filter files
	if search_term:
	filtered_files = [f for f in pdf_files if search_term.lower() in f.name.lower()]
	else:
	filtered_files = pdf_files

	st.markdown(f"Found: {len(filtered_files)} files")

	# File selector
	if filtered_files:
	selected_file = st.selectbox(
	"Select a PDF file:",
	filtered_files,
	format_func=lambda x: x.name
	)
	else:
	st.warning("No files match your search")
	return

	st.markdown("---")

	# Statistics
	st.markdown("### 📊 Statistics")
	total_pdfs = len(pdf_files)
	total_jsons = len(list(Path(JSON_DIR).glob("*.json"))) if Path(JSON_DIR).exists() else 0

	st.metric("Total PDFs", total_pdfs)
	st.metric("Total JSONs", total_jsons)

	# Navigation buttons
	st.markdown("---")
	st.markdown("### 🔄 Quick Navigation")

	col1, col2 = st.columns(2)
	current_idx = filtered_files.index(selected_file)

	with col1:
	if st.button("⬅️ Previous"):
	if current_idx > 0:
	selected_file = filtered_files[current_idx - 1]
	st.rerun()

	with col2:
	if st.button("Next ➡️"):
	if current_idx < len(filtered_files) - 1:
	selected_file = filtered_files[current_idx + 1]
	st.rerun()

	# Main content
	if selected_file:
	# File info
	st.markdown(f'<div class="info-box">', unsafe_allow_html=True)
	col1, col2 = st.columns([3, 1])
	with col1:
	st.markdown(f"Selected File: `{selected_file.name}`")
	with col2:
	file_size = selected_file.stat().st_size / 1024
	st.markdown(f"Size: {file_size:.1f} KB")
	st.markdown('</div>', unsafe_allow_html=True)

	# Create two columns
	col_pdf, col_json = st.columns([1, 1])

	# PDF Column
	with col_pdf:
	st.markdown("### 📄 PDF Document")

	# Zoom control
	zoom_level = st.select_slider(
	"🔍 Zoom Level",
	options=[1.0, 1.5, 2.0, 2.5, 3.0],
	value=2.0,
	format_func=lambda x: f"{int(x*100)}%"
	)

	with st.container():
	st.markdown('<div class="pdf-container">', unsafe_allow_html=True)
	display_pdf(selected_file, zoom_level)
	st.markdown('</div>', unsafe_allow_html=True)

	# JSON Column
	with col_json:
	st.markdown("### 📋 JSON Response")

	json_data = get_json_for_pdf(selected_file.name)

	if json_data:
	if "error" in json_data:
	st.error(json_data["error"])
	else:
	with st.container():
	st.markdown('<div class="json-container">', unsafe_allow_html=True)

	# Status
	if json_data.get("status") == "success":
	st.markdown('<div class="success-box">✅ Status: Success</div>', unsafe_allow_html=True)

	# Display sections
	if "data" in json_data:
	data = json_data["data"]

	# Section 1
	if "SECTION 1" in data or "SECTION_1" in data:
	section1 = data.get("SECTION 1") or data.get("SECTION_1")
	display_json_section("SECTION 1: Subject Identification", section1)

	# Section 2
	if "SECTION 2" in data or "SECTION_2" in data:
	section2 = data.get("SECTION 2") or data.get("SECTION_2")
	display_json_section("SECTION 2: Findings & Interpretation", section2)

	# Section 3
	if "SECTION 3" in data or "SECTION_3" in data:
	section3 = data.get("SECTION 3") or data.get("SECTION_3")
	display_json_section("SECTION 3: Brief Medical Summary", section3)

	# Section 4
	if "SECTION 4" in data or "SECTION_4" in data:
	section4 = data.get("SECTION 4") or data.get("SECTION_4")
	display_json_section("SECTION 4: Life Insurance Underwriting View", section4)

	# Raw JSON (collapsible)
	with st.expander("📝 View Raw JSON"):
	st.json(json_data)

	st.markdown('</div>', unsafe_allow_html=True)
	else:
	st.markdown('<div class="warning-box">⚠️ No JSON response found for this PDF</div>', unsafe_allow_html=True)
	st.info(f"Expected file: `{JSON_DIR}/{selected_file.name.replace('.pdf', '.json')}`")


	if __name__ == "__main__":
	main()