AzizWazir commited on
Commit
00bba6b
·
verified ·
1 Parent(s): 72c095c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -6,9 +6,6 @@ import pytesseract
6
  from docx import Document
7
  import tempfile
8
 
9
- # Folder path for PDFs
10
- pdf_folder_path = "D:/General"
11
-
12
  # Function to convert PDF to image
13
  def pdf_to_image(pdf_path):
14
  try:
@@ -37,18 +34,21 @@ def save_to_word(text, file_name):
37
 
38
  # Streamlit UI
39
  st.title("PDF to Word Converter")
40
- st.write("Converting PDFs from the D:/General folder")
 
 
 
41
 
42
- # Get all PDFs in the specified folder
43
- pdf_files = [f for f in os.listdir(pdf_folder_path) if f.lower().endswith('.pdf')]
 
 
 
 
44
 
45
- if pdf_files:
46
- for pdf_file in pdf_files:
47
- pdf_path = os.path.join(pdf_folder_path, pdf_file)
48
-
49
  # Convert PDF to images
50
- images = pdf_to_image(pdf_path)
51
-
52
  if images:
53
  # Extract text from images
54
  extracted_text = ""
@@ -59,8 +59,8 @@ if pdf_files:
59
 
60
  # Save the extracted text to Word
61
  if extracted_text:
62
- word_file = save_to_word(extracted_text, pdf_file)
63
- st.success(f"Conversion of {pdf_file} complete! Download the Word file below.")
64
- st.download_button(f"Download {pdf_file} as Word", word_file, file_name=f"{pdf_file}.docx")
65
  else:
66
- st.write("No PDFs found in the specified folder.")
 
6
  from docx import Document
7
  import tempfile
8
 
 
 
 
9
  # Function to convert PDF to image
10
  def pdf_to_image(pdf_path):
11
  try:
 
34
 
35
  # Streamlit UI
36
  st.title("PDF to Word Converter")
37
+ st.write("Upload a PDF to convert it to a Word document")
38
+
39
+ # File upload feature
40
+ uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True)
41
 
42
+ if uploaded_files:
43
+ for uploaded_file in uploaded_files:
44
+ # Save the uploaded PDF to a temporary file
45
+ temp_pdf_path = tempfile.mktemp(suffix=".pdf")
46
+ with open(temp_pdf_path, "wb") as temp_pdf:
47
+ temp_pdf.write(uploaded_file.getbuffer())
48
 
 
 
 
 
49
  # Convert PDF to images
50
+ images = pdf_to_image(temp_pdf_path)
51
+
52
  if images:
53
  # Extract text from images
54
  extracted_text = ""
 
59
 
60
  # Save the extracted text to Word
61
  if extracted_text:
62
+ word_file = save_to_word(extracted_text, uploaded_file.name)
63
+ st.success(f"Conversion of {uploaded_file.name} complete! Download the Word file below.")
64
+ st.download_button(f"Download {uploaded_file.name} as Word", word_file, file_name=f"{uploaded_file.name}.docx")
65
  else:
66
+ st.write("Please upload PDF files to convert.")