PRASHANTH REDDY commited on
Commit
496da0f
·
1 Parent(s): ea63e3b

actual Labels

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -1,14 +1,15 @@
1
  import gradio as gr
2
- from PyMuPDF import fitz
 
3
  import os
4
  import re
5
  from joblib import load
6
 
7
  def extract_text_from_pdf(file_path):
8
  text = ""
9
- doc = fitz.open(file_path)
10
- for page in doc:
11
- text += page.get_text()
12
  return text
13
 
14
  def preprocess_text(text):
 
1
  import gradio as gr
2
+ # from PyMuPDF import fitz
3
+ import pdfplumber
4
  import os
5
  import re
6
  from joblib import load
7
 
8
  def extract_text_from_pdf(file_path):
9
  text = ""
10
+ with pdfplumber.open(file_path) as pdf:
11
+ for page in pdf.pages:
12
+ text += page.extract_text()
13
  return text
14
 
15
  def preprocess_text(text):