SimpliAI
/

LlamaPDF

Yiming Qian commited on May 30, 2024

Commit

356589e

verified ·

1 Parent(s): 3b1a72a

Update README.md

Files changed (1) hide show

README.md CHANGED Viewed

@@ -8,8 +8,9 @@ pipeline_tag: feature-extraction
 It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
 Please use the following code to parse PDF.
 '''
-import pymupdf  # PyMuPDF
 from bs4 import BeautifulSoup
 import pickle
 import torch
@@ -20,7 +21,7 @@ torch.random.manual_seed(0)
 model_kwargs = dict(
     use_cache=False,
     trust_remote_code=True,
-    attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
     torch_dtype=torch.bfloat16,
     device_map="cuda",
     load_in_4bit=True
@@ -89,7 +90,7 @@ generation_args = {
     "do_sample": False,
 }
-# %%
 filename ='2023071000529.pdf'
 elements=[]
 with pymupdf.open(filename) as doc:

 It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
 Please use the following code to parse PDF.
 '''
+import pymupdf
 from bs4 import BeautifulSoup
 import pickle
 import torch
 model_kwargs = dict(
     use_cache=False,
     trust_remote_code=True,
+    attn_implementation="flash_attention_2",
     torch_dtype=torch.bfloat16,
     device_map="cuda",
     load_in_4bit=True
     "do_sample": False,
 }
 filename ='2023071000529.pdf'
 elements=[]
 with pymupdf.open(filename) as doc: