Spaces:
Sleeping
Sleeping
Commit ·
dcbd743
1
Parent(s): 5ae8277
Update app.py
Browse files
app.py
CHANGED
|
@@ -70,7 +70,8 @@ def parse_pdf(file_path):
|
|
| 70 |
def preprocess_pdf_text(pdf_file): #(list_of_text):
|
| 71 |
global page_num
|
| 72 |
pdf_txt, page_num = parse_pdf(pdf_file.name)
|
| 73 |
-
|
|
|
|
| 74 |
page_docs = [Document(page_content=page) for page in pdf_txt]
|
| 75 |
|
| 76 |
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=250, chunk_overlap=50)
|
|
@@ -92,8 +93,6 @@ def summarize_pdf(pdf_file, api_key,
|
|
| 92 |
# pdf_txt, page_num = parse_pdf(pdf_file.name)
|
| 93 |
# pdf_doc = preprocess_pdf_text(pdf_txt)
|
| 94 |
|
| 95 |
-
file_check(pdf_file)
|
| 96 |
-
|
| 97 |
# Build LLM Model
|
| 98 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 99 |
if model_list[model_name] == 'chat':
|
|
|
|
| 70 |
def preprocess_pdf_text(pdf_file): #(list_of_text):
|
| 71 |
global page_num
|
| 72 |
pdf_txt, page_num = parse_pdf(pdf_file.name)
|
| 73 |
+
file_check(pdf_file)
|
| 74 |
+
|
| 75 |
page_docs = [Document(page_content=page) for page in pdf_txt]
|
| 76 |
|
| 77 |
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=250, chunk_overlap=50)
|
|
|
|
| 93 |
# pdf_txt, page_num = parse_pdf(pdf_file.name)
|
| 94 |
# pdf_doc = preprocess_pdf_text(pdf_txt)
|
| 95 |
|
|
|
|
|
|
|
| 96 |
# Build LLM Model
|
| 97 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 98 |
if model_list[model_name] == 'chat':
|