Yiming Qian
commited on
Update README.md
Browse files
README.md
CHANGED
|
@@ -8,8 +8,9 @@ pipeline_tag: feature-extraction
|
|
| 8 |
It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
|
| 9 |
|
| 10 |
Please use the following code to parse PDF.
|
|
|
|
| 11 |
'''
|
| 12 |
-
import pymupdf
|
| 13 |
from bs4 import BeautifulSoup
|
| 14 |
import pickle
|
| 15 |
import torch
|
|
@@ -20,7 +21,7 @@ torch.random.manual_seed(0)
|
|
| 20 |
model_kwargs = dict(
|
| 21 |
use_cache=False,
|
| 22 |
trust_remote_code=True,
|
| 23 |
-
attn_implementation="flash_attention_2",
|
| 24 |
torch_dtype=torch.bfloat16,
|
| 25 |
device_map="cuda",
|
| 26 |
load_in_4bit=True
|
|
@@ -89,7 +90,7 @@ generation_args = {
|
|
| 89 |
"do_sample": False,
|
| 90 |
}
|
| 91 |
|
| 92 |
-
|
| 93 |
filename ='2023071000529.pdf'
|
| 94 |
elements=[]
|
| 95 |
with pymupdf.open(filename) as doc:
|
|
|
|
| 8 |
It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
|
| 9 |
|
| 10 |
Please use the following code to parse PDF.
|
| 11 |
+
|
| 12 |
'''
|
| 13 |
+
import pymupdf
|
| 14 |
from bs4 import BeautifulSoup
|
| 15 |
import pickle
|
| 16 |
import torch
|
|
|
|
| 21 |
model_kwargs = dict(
|
| 22 |
use_cache=False,
|
| 23 |
trust_remote_code=True,
|
| 24 |
+
attn_implementation="flash_attention_2",
|
| 25 |
torch_dtype=torch.bfloat16,
|
| 26 |
device_map="cuda",
|
| 27 |
load_in_4bit=True
|
|
|
|
| 90 |
"do_sample": False,
|
| 91 |
}
|
| 92 |
|
| 93 |
+
|
| 94 |
filename ='2023071000529.pdf'
|
| 95 |
elements=[]
|
| 96 |
with pymupdf.open(filename) as doc:
|