Spaces:
Build error
Build error
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import pdfplumber
|
| 2 |
from docx import Document
|
| 3 |
-
import
|
|
|
|
| 4 |
|
| 5 |
def extract_text(file):
|
| 6 |
if file.name.endswith(".pdf"):
|
|
@@ -12,12 +13,12 @@ def extract_text(file):
|
|
| 12 |
elif file.name.endswith(".txt"):
|
| 13 |
return file.read().decode("utf-8")
|
| 14 |
elif file.name.endswith(".html"):
|
| 15 |
-
|
|
|
|
| 16 |
else:
|
| 17 |
return file.read().decode("utf-8")
|
| 18 |
|
| 19 |
def render_template(template_path, context):
|
| 20 |
-
from jinja2 import Environment, FileSystemLoader
|
| 21 |
env = Environment(loader=FileSystemLoader("templates"))
|
| 22 |
template = env.get_template(template_path)
|
| 23 |
return template.render(context)
|
|
|
|
| 1 |
import pdfplumber
|
| 2 |
from docx import Document
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
from jinja2 import Environment, FileSystemLoader
|
| 5 |
|
| 6 |
def extract_text(file):
|
| 7 |
if file.name.endswith(".pdf"):
|
|
|
|
| 13 |
elif file.name.endswith(".txt"):
|
| 14 |
return file.read().decode("utf-8")
|
| 15 |
elif file.name.endswith(".html"):
|
| 16 |
+
soup = BeautifulSoup(file.read(), "html.parser")
|
| 17 |
+
return soup.get_text()
|
| 18 |
else:
|
| 19 |
return file.read().decode("utf-8")
|
| 20 |
|
| 21 |
def render_template(template_path, context):
|
|
|
|
| 22 |
env = Environment(loader=FileSystemLoader("templates"))
|
| 23 |
template = env.get_template(template_path)
|
| 24 |
return template.render(context)
|