Spaces:
Sleeping
Sleeping
Update document_scrapped.py
Browse files- document_scrapped.py +2 -1
document_scrapped.py
CHANGED
|
@@ -5,6 +5,7 @@ import json
|
|
| 5 |
import io
|
| 6 |
import fitz
|
| 7 |
from pptx import Presentation
|
|
|
|
| 8 |
import chardet
|
| 9 |
from docx import Document
|
| 10 |
import pandas as pd
|
|
@@ -96,7 +97,7 @@ def excel(link : str) -> str:
|
|
| 96 |
response = requests.get(link)
|
| 97 |
if response.status_code == 200:
|
| 98 |
file_content = response.content
|
| 99 |
-
df = pd.read_excel(file_content)
|
| 100 |
if df.shape[0] > 50:
|
| 101 |
sample_size = 50
|
| 102 |
sample_df = df.sample(n=sample_size, random_state=42)
|
|
|
|
| 5 |
import io
|
| 6 |
import fitz
|
| 7 |
from pptx import Presentation
|
| 8 |
+
from io import BytesIO
|
| 9 |
import chardet
|
| 10 |
from docx import Document
|
| 11 |
import pandas as pd
|
|
|
|
| 97 |
response = requests.get(link)
|
| 98 |
if response.status_code == 200:
|
| 99 |
file_content = response.content
|
| 100 |
+
df = pd.read_excel(BytesIO(file_content))
|
| 101 |
if df.shape[0] > 50:
|
| 102 |
sample_size = 50
|
| 103 |
sample_df = df.sample(n=sample_size, random_state=42)
|