madankn79 commited on
Commit
a085c86
·
1 Parent(s): fe9658d

Initial Commit 5.1.0

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -14,6 +14,7 @@ import docx
14
  from docx.shared import Inches
15
  import logging
16
  import base64
 
17
 
18
  # Setup
19
  API_KEY = os.getenv("PDF_API_KEY")
@@ -131,6 +132,7 @@ def extract_pdf_to_html(file) -> tuple[str, docx.Document]:
131
 
132
  with pdfplumber.open(file) as pdf:
133
  for page_num, page in enumerate(pdf.pages):
 
134
  page_title = f"Page {page_num + 1}"
135
  toc.append(f"<li><a href='#page{page_num+1}'>{page_title}</a></li>")
136
  html_output += f"<h2 id='page{page_num+1}'>{page_title}</h2>\n"
@@ -167,6 +169,6 @@ def extract_pdf_to_html(file) -> tuple[str, docx.Document]:
167
  docx_output.add_picture(buffer, width=Inches(5))
168
  except Exception:
169
  pass
170
-
171
  full_html = f"<ul>{''.join(toc)}</ul>\n" + html_output
172
  return full_html, docx_output
 
14
  from docx.shared import Inches
15
  import logging
16
  import base64
17
+ import time
18
 
19
  # Setup
20
  API_KEY = os.getenv("PDF_API_KEY")
 
132
 
133
  with pdfplumber.open(file) as pdf:
134
  for page_num, page in enumerate(pdf.pages):
135
+ start = time.time()
136
  page_title = f"Page {page_num + 1}"
137
  toc.append(f"<li><a href='#page{page_num+1}'>{page_title}</a></li>")
138
  html_output += f"<h2 id='page{page_num+1}'>{page_title}</h2>\n"
 
169
  docx_output.add_picture(buffer, width=Inches(5))
170
  except Exception:
171
  pass
172
+ logger.info(f"Processed page {page_num + 1} in {time.time() - start:.2f}s")
173
  full_html = f"<ul>{''.join(toc)}</ul>\n" + html_output
174
  return full_html, docx_output