PirateXX commited on
Commit
3dfc25f
·
1 Parent(s): 12ca948

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import os
6
  import re
7
  import PyPDF2
 
8
 
9
  app = Flask(__name__)
10
 
@@ -66,29 +67,30 @@ def findRealProb(text):
66
 
67
  def upload_file(file):
68
 
69
- # if 'pdfFile' in request.files:
70
- # pdf_file = request.files['pdfFile']
71
- # text = ""
72
- # with pdfplumber.open(pdf_file) as pdf:
73
- # cnt = 0
74
- # for page in pdf.pages:
75
- # cnt+=1
76
- # text+=(page.extract_text(x_tolerance = 1))
77
- # print(text)
78
- # if cnt>5:
79
- # break
80
- # return findRealProb(text)
81
- # # return jsonify({'text': text})
82
- if file:
83
-
84
- with open(file.name, 'rb') as pdf_file:
85
- pdf_reader = PyPDF2.PdfReader(pdf_file)
86
- text = ''
87
- for page_num in range(len(pdf_reader.pages)):
88
- page = pdf_reader.pages[page_num]
89
- text += page.extract_text()
90
- text = text.replace('\n', ' ')
91
  return findRealProb(text)
 
 
 
 
 
 
 
 
 
 
 
92
  # pdf_file = file.name
93
  # print(file, pdf_file)
94
  # text = ""
 
5
  import os
6
  import re
7
  import PyPDF2
8
+ import pdfplumber
9
 
10
  app = Flask(__name__)
11
 
 
67
 
68
  def upload_file(file):
69
 
70
+ if 'pdfFile' in request.files:
71
+ pdf_file = request.files['pdfFile']
72
+ text = ""
73
+ with pdfplumber.open(pdf_file) as pdf:
74
+ cnt = 0
75
+ for page in pdf.pages:
76
+ cnt+=1
77
+ text+=(page.extract_text(x_tolerance = 1))
78
+ print(text)
79
+ if cnt>5:
80
+ break
81
+ text = text.replace('\n', ' ')
 
 
 
 
 
 
 
 
 
 
82
  return findRealProb(text)
83
+ # return jsonify({'text': text})
84
+ # if file:
85
+
86
+ # with open(file.name, 'rb') as pdf_file:
87
+ # pdf_reader = PyPDF2.PdfReader(pdf_file)
88
+ # text = ''
89
+ # for page_num in range(len(pdf_reader.pages)):
90
+ # page = pdf_reader.pages[page_num]
91
+ # text += page.extract_text()
92
+ # text = text.replace('\n', ' ')
93
+ # return findRealProb(text)
94
  # pdf_file = file.name
95
  # print(file, pdf_file)
96
  # text = ""