Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,9 +29,18 @@ def generate_answer(brand_name,question, files):
|
|
| 29 |
|
| 30 |
# 函数:解析PDF文件
|
| 31 |
def extract_text_from_pdf(file_path):
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
text = ""
|
| 34 |
-
for page_num in range(len(
|
| 35 |
page = doc.load_page(page_num)
|
| 36 |
text += page.get_text()
|
| 37 |
return text
|
|
|
|
| 29 |
|
| 30 |
# 函数:解析PDF文件
|
| 31 |
def extract_text_from_pdf(file_path):
|
| 32 |
+
pdf_document = fitz.open(file_path)
|
| 33 |
+
|
| 34 |
+
total_pages = pdf_document.page_count
|
| 35 |
+
print(f"总页数: {total_pages}")
|
| 36 |
+
|
| 37 |
+
# 读取文本
|
| 38 |
+
page = pdf_document.load_page(0) # 读取第一页
|
| 39 |
+
text = page.get_text("text")
|
| 40 |
+
print(f"第一页文本:\n{text}")
|
| 41 |
+
|
| 42 |
text = ""
|
| 43 |
+
for page_num in range(len(pdf_document)):
|
| 44 |
page = doc.load_page(page_num)
|
| 45 |
text += page.get_text()
|
| 46 |
return text
|