Spaces:
Sleeping
Sleeping
Update document_scrapped.py
Browse files- document_scrapped.py +6 -5
document_scrapped.py
CHANGED
|
@@ -9,6 +9,7 @@ from io import BytesIO
|
|
| 9 |
import chardet
|
| 10 |
from docx import Document
|
| 11 |
import pandas as pd
|
|
|
|
| 12 |
from io import BytesIO
|
| 13 |
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
| 14 |
from pdfminer.converter import TextConverter
|
|
@@ -195,17 +196,17 @@ def get_data(url):
|
|
| 195 |
ext = jo.split(".")[-1]
|
| 196 |
if ext == 'xlsx' or ext == 'xls' or ext == 'xlsm':
|
| 197 |
rs = excel(jo)
|
| 198 |
-
return rs
|
| 199 |
elif ext == 'pdf':
|
| 200 |
rs = pdf(jo)
|
| 201 |
-
return rs
|
| 202 |
elif ext == 'docx' or ext == 'doc':
|
| 203 |
rs = docx(jo)
|
| 204 |
-
return rs
|
| 205 |
elif ext == 'csv':
|
| 206 |
rs = csv(jo)
|
| 207 |
-
return rs
|
| 208 |
elif ext == 'pptx' or ext == 'ppt':
|
| 209 |
rs = pptx(jo)
|
| 210 |
-
return rs
|
| 211 |
return "No data returned"
|
|
|
|
| 9 |
import chardet
|
| 10 |
from docx import Document
|
| 11 |
import pandas as pd
|
| 12 |
+
from sumarize import summarize
|
| 13 |
from io import BytesIO
|
| 14 |
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
| 15 |
from pdfminer.converter import TextConverter
|
|
|
|
| 196 |
ext = jo.split(".")[-1]
|
| 197 |
if ext == 'xlsx' or ext == 'xls' or ext == 'xlsm':
|
| 198 |
rs = excel(jo)
|
| 199 |
+
return summarize.invoke({"input":rs})
|
| 200 |
elif ext == 'pdf':
|
| 201 |
rs = pdf(jo)
|
| 202 |
+
return summarize.invoke({"input":rs})
|
| 203 |
elif ext == 'docx' or ext == 'doc':
|
| 204 |
rs = docx(jo)
|
| 205 |
+
return summarize.invoke({"input":rs})
|
| 206 |
elif ext == 'csv':
|
| 207 |
rs = csv(jo)
|
| 208 |
+
return summarize.invoke({"input":rs})
|
| 209 |
elif ext == 'pptx' or ext == 'ppt':
|
| 210 |
rs = pptx(jo)
|
| 211 |
+
return summarize.invoke({"input":rs})
|
| 212 |
return "No data returned"
|