Spaces:
Build error
Build error
File size: 5,245 Bytes
13e4835 986cd95 13e4835 c650b65 13e4835 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | import streamlit as st
import pandas as pd
import pymupdf
from helper.button_behaviour import hide_button, unhide_button
class uploadFile:
def __init__(self):
self.file_dict = {}
self.file_gt = {}
def multiple_upload_file(self, uploaded_files):
for _ in range(len(self.file_dict)):
self.file_dict.popitem()
for uploaded_file in uploaded_files:
if uploaded_file.type == "application/pdf":
try:
with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
text = chr(12).join([page.get_text() for page in doc])
self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
except Exception:
pass
elif uploaded_file.type == "text/csv":
try:
df = pd.read_csv(uploaded_file)
self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
except Exception:
pass
st.session_state['uploaded_files'] = self.file_dict
def upload_website_audience(self, uploaded_files):
for _ in range(len(self.file_dict)):
self.file_dict.popitem()
for uploaded_file in uploaded_files:
if uploaded_file.type == "application/pdf":
try:
with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
text = chr(12).join([page.get_text() for page in doc])
self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
except Exception:
pass
elif uploaded_file.type == "text/csv":
try:
# Skip comment lines that start with #
df = pd.read_csv(
uploaded_file,
comment='#', # Treat lines starting with # as comments
engine='python' # Use more flexible engine
)
self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
except Exception as e:
print(f"Error processing CSV: {str(e)}")
# If that fails, you could try a more manual approach
try:
uploaded_file.seek(0)
raw_text = uploaded_file.read().decode('utf-8')
# Get only non-comment lines
data_lines = [line for line in raw_text.split('\n') if not line.strip().startswith('#')]
# Use StringIO to create a file-like object from the filtered lines
from io import StringIO
csv_data = StringIO('\n'.join(data_lines))
# Read from the filtered data
df = pd.read_csv(csv_data)
self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
except Exception as e:
print(f"Second attempt failed: {str(e)}")
st.session_state['upload_website_audience'] = self.file_dict
def upload_file_seo(self, uploaded_files):
for _ in range(len(self.file_dict)):
self.file_dict.popitem()
for uploaded_file in uploaded_files:
if uploaded_file.type == "application/pdf":
try:
with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
text = chr(12).join([page.get_text() for page in doc])
self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
except Exception:
pass
elif uploaded_file.type == "text/csv":
try:
content = uploaded_file.read().decode("utf-8")
self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': content}
except Exception:
pass
st.session_state['uploaded_files'] = self.file_dict
def upload_gt(self, gtmetrix):
for _ in range(len(self.file_gt)):
self.file_gt.popitem()
for gtmetrixs in gtmetrix:
if gtmetrixs.type == "application/pdf":
try:
with pymupdf.open(stream=gtmetrixs.read(), filetype="pdf") as doc:
text = chr(12).join([page.get_text() for page in doc])
self.file_gt[gtmetrixs.name] = {'type': 'pdf', 'content': text}
except Exception:
pass
elif gtmetrixs.type == "text/csv":
try:
content = gtmetrixs.read().decode("utf-8")
self.file_dict[gtmetrixs.name] = {'type': 'csv', 'content': content}
except Exception:
pass
st.session_state['uploaded_gt'] = self.file_gt
if __name__ == "__main__":
app = uploadFile()
st.set_page_config(layout="wide")
|