Spaces:
Sleeping
Sleeping
Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pdfplumber
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def extract_text_from_pdf(file_obj):
|
| 5 |
+
text = ""
|
| 6 |
+
with pdfplumber.open(file_obj) as pdf:
|
| 7 |
+
for page in pdf.pages:
|
| 8 |
+
text += page.extract_text() or ""
|
| 9 |
+
return text
|
| 10 |
+
|
| 11 |
+
def simple_clause_split(text):
|
| 12 |
+
return [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
|