Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +7 -8
mdr_pdf_parser.py
CHANGED
|
@@ -1813,23 +1813,22 @@ class MDRLayoutReader:
|
|
| 1813 |
# In class MDRLayoutReader:
|
| 1814 |
def _get_model(self) -> LayoutLMv3ForTokenClassification | None:
|
| 1815 |
if self._model is None:
|
| 1816 |
-
|
| 1817 |
-
#
|
| 1818 |
-
layoutreader_cache_dir = Path(self.
|
| 1819 |
-
mdr_ensure_directory(str(layoutreader_cache_dir))
|
| 1820 |
|
| 1821 |
name = "microsoft/layoutlmv3-base"
|
| 1822 |
|
| 1823 |
print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{name}'. Cache dir: {layoutreader_cache_dir}")
|
| 1824 |
try:
|
| 1825 |
self._model = LayoutLMv3ForTokenClassification.from_pretrained(
|
| 1826 |
-
name,
|
| 1827 |
cache_dir=str(layoutreader_cache_dir),
|
| 1828 |
-
local_files_only=False,
|
| 1829 |
num_labels=_MDR_MAX_LEN+1
|
| 1830 |
)
|
| 1831 |
-
|
| 1832 |
-
self._model.to(torch.device(self._device)) # ENSURE THIS LINE IS PRESENT AND CORRECT
|
| 1833 |
self._model.eval()
|
| 1834 |
print(f"MDR LayoutReader model '{name}' loaded successfully on device: {self._model.device}.")
|
| 1835 |
except Exception as e:
|
|
|
|
| 1813 |
# In class MDRLayoutReader:
|
| 1814 |
def _get_model(self) -> LayoutLMv3ForTokenClassification | None:
|
| 1815 |
if self._model is None:
|
| 1816 |
+
# MODIFIED: Use self._model_path for the layoutreader's specific cache,
|
| 1817 |
+
# and ensure it's a directory. self._model_path is passed during MDRLayoutReader init.
|
| 1818 |
+
layoutreader_cache_dir = Path(self._model_path) # self._model_path is like "./mdr_models/layoutreader"
|
| 1819 |
+
mdr_ensure_directory(str(layoutreader_cache_dir)) # Ensure this specific directory exists
|
| 1820 |
|
| 1821 |
name = "microsoft/layoutlmv3-base"
|
| 1822 |
|
| 1823 |
print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{name}'. Cache dir: {layoutreader_cache_dir}")
|
| 1824 |
try:
|
| 1825 |
self._model = LayoutLMv3ForTokenClassification.from_pretrained(
|
| 1826 |
+
name,
|
| 1827 |
cache_dir=str(layoutreader_cache_dir),
|
| 1828 |
+
local_files_only=False,
|
| 1829 |
num_labels=_MDR_MAX_LEN+1
|
| 1830 |
)
|
| 1831 |
+
self._model.to(torch.device(self._device))
|
|
|
|
| 1832 |
self._model.eval()
|
| 1833 |
print(f"MDR LayoutReader model '{name}' loaded successfully on device: {self._model.device}.")
|
| 1834 |
except Exception as e:
|