KevinHuSh
commited on
Commit
·
e7e8c6b
1
Parent(s):
e31db28
continue add layout model for 'laws' (#292)
Browse files### What problem does this PR solve?
Issue link:#289
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- rag/app/laws.py +5 -2
rag/app/laws.py
CHANGED
|
@@ -25,8 +25,7 @@ from rag.settings import cron_logger
|
|
| 25 |
|
| 26 |
class Docx(DocxParser):
|
| 27 |
def __init__(self):
|
| 28 |
-
|
| 29 |
-
super().__init__()
|
| 30 |
|
| 31 |
def __clean(self, line):
|
| 32 |
line = re.sub(r"\u3000", " ", line).strip()
|
|
@@ -52,6 +51,10 @@ class Docx(DocxParser):
|
|
| 52 |
|
| 53 |
|
| 54 |
class Pdf(PdfParser):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def __call__(self, filename, binary=None, from_page=0,
|
| 56 |
to_page=100000, zoomin=3, callback=None):
|
| 57 |
callback(msg="OCR is running...")
|
|
|
|
| 25 |
|
| 26 |
class Docx(DocxParser):
|
| 27 |
def __init__(self):
|
| 28 |
+
pass
|
|
|
|
| 29 |
|
| 30 |
def __clean(self, line):
|
| 31 |
line = re.sub(r"\u3000", " ", line).strip()
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
class Pdf(PdfParser):
|
| 54 |
+
def __init__(self):
|
| 55 |
+
self.model_speciess = ParserType.LAWS.value
|
| 56 |
+
super().__init__()
|
| 57 |
+
|
| 58 |
def __call__(self, filename, binary=None, from_page=0,
|
| 59 |
to_page=100000, zoomin=3, callback=None):
|
| 60 |
callback(msg="OCR is running...")
|