Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
|
|
|
|
|
|
|
| 4 |
text = ""
|
| 5 |
with open(pdf_path, "rb") as file:
|
| 6 |
reader = PdfReader(file)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from lingtrain_aligner import preprocessor, splitter, aligner, resolver, reader, vis_helper
|
| 4 |
+
from PyPDF2 import PdfReader
|
| 5 |
|
| 6 |
+
def pdf_to_text(pdf_path: str) -> str:
|
| 7 |
+
|
| 8 |
text = ""
|
| 9 |
with open(pdf_path, "rb") as file:
|
| 10 |
reader = PdfReader(file)
|