Uneedbest commited on
Commit
2e4a47b
·
verified ·
1 Parent(s): 4aebbe9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- import fitz # pymupdf
3
  import numpy as np
4
  from PIL import Image
5
  import pytesseract
@@ -76,8 +76,11 @@ The output should be a complete, ready-to-use exam paper with the same structure
76
  def extract_text_from_pdf(uploaded_file):
77
  """Extract text from PDF files"""
78
  try:
79
- doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
80
- return "\n".join([page.get_text() for page in doc])
 
 
 
81
  except Exception as e:
82
  st.error(f"Error extracting text from PDF: {str(e)}")
83
  return ""
 
1
  import streamlit as st
2
+ import PyPDF2 # Using PyPDF2 instead of PyMuPDF
3
  import numpy as np
4
  from PIL import Image
5
  import pytesseract
 
76
  def extract_text_from_pdf(uploaded_file):
77
  """Extract text from PDF files"""
78
  try:
79
+ reader = PyPDF2.PdfReader(uploaded_file)
80
+ text = ""
81
+ for page in reader.pages:
82
+ text += page.extract_text() + "\n"
83
+ return text
84
  except Exception as e:
85
  st.error(f"Error extracting text from PDF: {str(e)}")
86
  return ""