| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| https://unstructured.io/ | |
| https://github.com/Unstructured-IO/unstructured | |
| """ | |
| import unstructured | |
| import unstructured_inference | |
| from unstructured.partition.pdf import partition_pdf | |
| elements = partition_pdf(filename=r"E:\Users\tianx\intelli-zen\document_loaders\data\files\pdf\2024.naacl-long.35.pdf") | |
| for element in elements: | |
| print(element) | |
| if __name__ == "__main__": | |
| pass | |