ReneeHWT commited on
Commit
f69b8ba
·
verified ·
1 Parent(s): 40ca645

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -16,11 +16,8 @@ def extract_text(file):
16
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
17
  full_text = ""
18
  for page in doc:
19
- # 以 300 dpi 渲染
20
  pix = page.get_pixmap(dpi=300)
21
- img_data = pix.tobytes("png")
22
- img = Image.open(io.BytesIO(img_data))
23
- # OCR,支援英文與繁體中文
24
  page_text = pytesseract.image_to_string(img, lang='eng+chi_tra')
25
  full_text += page_text + "\n\n"
26
  return full_text
 
16
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
17
  full_text = ""
18
  for page in doc:
 
19
  pix = page.get_pixmap(dpi=300)
20
+ img = Image.open(io.BytesIO(pix.tobytes("png")))
 
 
21
  page_text = pytesseract.image_to_string(img, lang='eng+chi_tra')
22
  full_text += page_text + "\n\n"
23
  return full_text