Spaces:

newoceangroup
/

pdf_layers_extractor

Sleeping

App Files Files Community

pdf_layers_extractor / layer_info.py

namtr92

Upload 4 files

25fbdee verified about 1 year ago

raw

history blame contribute delete

5.49 kB

	"""
	Helper script to display detailed information about PDF layers.
	Usage: python layer_info.py <path_to_pdf>
	"""

	import sys
	import fitz # PyMuPDF
	from typing import List, Dict, Any
	import argparse
	import json

	def get_ocg_xrefs(doc) -> List[int]:
	"""Get xrefs for all OCGs in the document."""
	try:
	# Check if PDF has an OCG structure
	catalog = doc.pdf_catalog()
	if not catalog or "OCProperties" not in catalog:
	return []

	# Use xref querying for the most reliable method
	ocg_xrefs = []
	for xref in range(1, doc.xref_length()):
	try:
	obj = doc.xref_object(xref, compressed=True)
	if obj and obj.startswith(b"/Type/OCG"):
	ocg_xrefs.append(xref)
	except:
	continue
	return ocg_xrefs
	except Exception as e:
	print(f"Error getting OCG xrefs: {e}")
	return []

	def get_layer_info(pdf_path: str) -> Dict[str, Any]:
	"""Get full layer information from a PDF using latest PyMuPDF API."""
	doc = fitz.open(pdf_path)

	try:
	# Create a result dictionary
	result = {"ocgs": []}

	# Try get_ocgs method first (most reliable)
	if hasattr(doc, "get_ocgs"):
	ocgs = doc.get_ocgs()
	if ocgs:
	for xref, name in ocgs:
	# Try to get detailed OCG info
	try:
	if hasattr(doc, "get_oc"):
	ocg_info = doc.get_oc(xref)
	if ocg_info:
	result["ocgs"].append(ocg_info)
	else:
	result["ocgs"].append({"id": xref, "name": name})
	else:
	result["ocgs"].append({"id": xref, "name": name})
	except Exception as e:
	print(f"Error getting OCG details for {name}: {e}")
	result["ocgs"].append({"id": xref, "name": name})
	return result

	# Try fallback approaches
	# First try to get xrefs directly
	ocg_xrefs = get_ocg_xrefs(doc)
	if ocg_xrefs:
	for xref in ocg_xrefs:
	try:
	# Try to get OCG info
	if hasattr(doc, "get_oc"):
	ocg_info = doc.get_oc(xref)
	if ocg_info:
	result["ocgs"].append(ocg_info)
	else:
	result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
	else:
	result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
	except Exception as e:
	print(f"Error getting OCG info for xref {xref}: {e}")
	result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
	return result

	# Try layer_ui_configs
	if hasattr(doc, "layer_ui_configs"):
	configs = list(doc.layer_ui_configs())
	for cfg in configs:
	result["ocgs"].append({
	"id": cfg.get("number", 0),
	"name": cfg.get("text", "Unnamed"),
	"on": cfg.get("on", False),
	"locked": cfg.get("locked", False),
	"usage": {"view": True}
	})

	return result
	except Exception as e:
	print(f"Error getting layer info: {e}")
	return {"ocgs": []}
	finally:
	doc.close()

	def print_layer_info(pdf_path: str, show_details: bool = False) -> None:
	"""Print detailed information about PDF layers."""
	oc_info = get_layer_info(pdf_path)

	if not oc_info or "ocgs" not in oc_info or not oc_info["ocgs"]:
	print(f"No layer information found in {pdf_path}")
	return

	ocgs = oc_info["ocgs"]
	print(f"Found {len(ocgs)} layers in {pdf_path}:")
	print("-" * 70)

	if show_details:
	# Print detailed JSON output
	print(json.dumps(oc_info, indent=2))
	print("-" * 70)

	# Print summary table
	print(f"{'ID':<10} {'Name':<30} {'State':<10} {'Locked':<10} {'Intent'}")
	print("-" * 70)

	for ocg in ocgs:
	ocg_id = ocg.get("id", "N/A")
	name = ocg.get("name", "Unnamed")
	state = "On" if ocg.get("on", True) else "Off"
	locked = "Yes" if ocg.get("locked", False) else "No"

	# Get usage/intent information
	usage = ocg.get("usage", {})
	intent = []
	for k, v in usage.items():
	if v:
	intent.append(k)
	intent_str = ", ".join(intent) if intent else "None"

	print(f"{ocg_id:<10} {name:<30} {state:<10} {locked:<10} {intent_str}")

	print("-" * 70)

	def main():
	parser = argparse.ArgumentParser(description='Display layer information from a PDF file')
	parser.add_argument('pdf_path', help='Path to the PDF file')
	parser.add_argument('--details', '-d', action='store_true', help='Show full layer configuration details')

	args = parser.parse_args()
	print_layer_info(args.pdf_path, args.details)

	if __name__ == "__main__":
	main()