Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import openai
|
| 4 |
from openai import OpenAI
|
| 5 |
import logging
|
| 6 |
-
|
| 7 |
import pdfminer.high_level
|
| 8 |
import docx
|
| 9 |
import numpy as np
|
|
@@ -29,9 +29,11 @@ def generate_answer(brand_name,question, files):
|
|
| 29 |
|
| 30 |
# 函数:解析PDF文件
|
| 31 |
def extract_text_from_pdf(file_path):
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
return text
|
| 36 |
|
| 37 |
# 函数:解析DOCX文件
|
|
|
|
| 3 |
import openai
|
| 4 |
from openai import OpenAI
|
| 5 |
import logging
|
| 6 |
+
import fitz # PyMuPDF
|
| 7 |
import pdfminer.high_level
|
| 8 |
import docx
|
| 9 |
import numpy as np
|
|
|
|
| 29 |
|
| 30 |
# 函数:解析PDF文件
|
| 31 |
def extract_text_from_pdf(file_path):
|
| 32 |
+
doc = fitz.open(file_path)
|
| 33 |
+
text = ""
|
| 34 |
+
for page_num in range(len(doc)):
|
| 35 |
+
page = doc.load_page(page_num)
|
| 36 |
+
text += page.get_text()
|
| 37 |
return text
|
| 38 |
|
| 39 |
# 函数:解析DOCX文件
|