manabb commited on
Commit
5d69f2a
·
verified ·
1 Parent(s): d538593

Update manabCQgenetaion.py

Browse files
Files changed (1) hide show
  1. manabCQgenetaion.py +3 -2
manabCQgenetaion.py CHANGED
@@ -36,9 +36,10 @@ manual_rules=NRLimportRules1()
36
  def compliance_import_OEM(manabfile: str, client):
37
 
38
  pages = convert_from_path(manabfile, dpi=300)
39
- doc_text = ""
40
  for page in pages:
41
- doc_text += pytesseract.image_to_string(page) + "\n"
 
42
  # Extract full PDF text (handles layout/tables well)
43
  #loader = PyMuPDFLoader(manabfile)
44
  #docs = loader.load()
 
36
  def compliance_import_OEM(manabfile: str, client):
37
 
38
  pages = convert_from_path(manabfile, dpi=300)
39
+ text = ""
40
  for page in pages:
41
+ text += pytesseract.image_to_string(page) + "\n"
42
+ doc_text=normalize_text(text)
43
  # Extract full PDF text (handles layout/tables well)
44
  #loader = PyMuPDFLoader(manabfile)
45
  #docs = loader.load()