Zaious commited on
Commit
c164738
·
verified ·
1 Parent(s): 8f660bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import openai
4
  from openai import OpenAI
5
  import logging
6
-
7
  import pdfminer.high_level
8
  import docx
9
  import numpy as np
@@ -29,9 +29,11 @@ def generate_answer(brand_name,question, files):
29
 
30
  # 函数:解析PDF文件
31
  def extract_text_from_pdf(file_path):
32
- text = pdfminer.high_level.extract_text(file_path)
33
- print("=======ppa=======")
34
- print(text)
 
 
35
  return text
36
 
37
  # 函数:解析DOCX文件
 
3
  import openai
4
  from openai import OpenAI
5
  import logging
6
+ import fitz # PyMuPDF
7
  import pdfminer.high_level
8
  import docx
9
  import numpy as np
 
29
 
30
  # 函数:解析PDF文件
31
  def extract_text_from_pdf(file_path):
32
+ doc = fitz.open(file_path)
33
+ text = ""
34
+ for page_num in range(len(doc)):
35
+ page = doc.load_page(page_num)
36
+ text += page.get_text()
37
  return text
38
 
39
  # 函数:解析DOCX文件