pdf_layers_extractor / layer_info.py
namtr92's picture
Upload 4 files
25fbdee verified
"""
Helper script to display detailed information about PDF layers.
Usage: python layer_info.py <path_to_pdf>
"""
import sys
import fitz # PyMuPDF
from typing import List, Dict, Any
import argparse
import json
def get_ocg_xrefs(doc) -> List[int]:
"""Get xrefs for all OCGs in the document."""
try:
# Check if PDF has an OCG structure
catalog = doc.pdf_catalog()
if not catalog or "OCProperties" not in catalog:
return []
# Use xref querying for the most reliable method
ocg_xrefs = []
for xref in range(1, doc.xref_length()):
try:
obj = doc.xref_object(xref, compressed=True)
if obj and obj.startswith(b"/Type/OCG"):
ocg_xrefs.append(xref)
except:
continue
return ocg_xrefs
except Exception as e:
print(f"Error getting OCG xrefs: {e}")
return []
def get_layer_info(pdf_path: str) -> Dict[str, Any]:
"""Get full layer information from a PDF using latest PyMuPDF API."""
doc = fitz.open(pdf_path)
try:
# Create a result dictionary
result = {"ocgs": []}
# Try get_ocgs method first (most reliable)
if hasattr(doc, "get_ocgs"):
ocgs = doc.get_ocgs()
if ocgs:
for xref, name in ocgs:
# Try to get detailed OCG info
try:
if hasattr(doc, "get_oc"):
ocg_info = doc.get_oc(xref)
if ocg_info:
result["ocgs"].append(ocg_info)
else:
result["ocgs"].append({"id": xref, "name": name})
else:
result["ocgs"].append({"id": xref, "name": name})
except Exception as e:
print(f"Error getting OCG details for {name}: {e}")
result["ocgs"].append({"id": xref, "name": name})
return result
# Try fallback approaches
# First try to get xrefs directly
ocg_xrefs = get_ocg_xrefs(doc)
if ocg_xrefs:
for xref in ocg_xrefs:
try:
# Try to get OCG info
if hasattr(doc, "get_oc"):
ocg_info = doc.get_oc(xref)
if ocg_info:
result["ocgs"].append(ocg_info)
else:
result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
else:
result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
except Exception as e:
print(f"Error getting OCG info for xref {xref}: {e}")
result["ocgs"].append({"id": xref, "name": f"Layer_{xref}"})
return result
# Try layer_ui_configs
if hasattr(doc, "layer_ui_configs"):
configs = list(doc.layer_ui_configs())
for cfg in configs:
result["ocgs"].append({
"id": cfg.get("number", 0),
"name": cfg.get("text", "Unnamed"),
"on": cfg.get("on", False),
"locked": cfg.get("locked", False),
"usage": {"view": True}
})
return result
except Exception as e:
print(f"Error getting layer info: {e}")
return {"ocgs": []}
finally:
doc.close()
def print_layer_info(pdf_path: str, show_details: bool = False) -> None:
"""Print detailed information about PDF layers."""
oc_info = get_layer_info(pdf_path)
if not oc_info or "ocgs" not in oc_info or not oc_info["ocgs"]:
print(f"No layer information found in {pdf_path}")
return
ocgs = oc_info["ocgs"]
print(f"Found {len(ocgs)} layers in {pdf_path}:")
print("-" * 70)
if show_details:
# Print detailed JSON output
print(json.dumps(oc_info, indent=2))
print("-" * 70)
# Print summary table
print(f"{'ID':<10} {'Name':<30} {'State':<10} {'Locked':<10} {'Intent'}")
print("-" * 70)
for ocg in ocgs:
ocg_id = ocg.get("id", "N/A")
name = ocg.get("name", "Unnamed")
state = "On" if ocg.get("on", True) else "Off"
locked = "Yes" if ocg.get("locked", False) else "No"
# Get usage/intent information
usage = ocg.get("usage", {})
intent = []
for k, v in usage.items():
if v:
intent.append(k)
intent_str = ", ".join(intent) if intent else "None"
print(f"{ocg_id:<10} {name:<30} {state:<10} {locked:<10} {intent_str}")
print("-" * 70)
def main():
parser = argparse.ArgumentParser(description='Display layer information from a PDF file')
parser.add_argument('pdf_path', help='Path to the PDF file')
parser.add_argument('--details', '-d', action='store_true', help='Show full layer configuration details')
args = parser.parse_args()
print_layer_info(args.pdf_path, args.details)
if __name__ == "__main__":
main()