Spaces:
Build error
Build error
Ronio Jerico Roque
Add WebsiteAudienceAcquisition class and integrate into analysis workflow; refactor upload handling in uploadFile
c650b65 | import streamlit as st | |
| import pandas as pd | |
| import pymupdf | |
| from helper.button_behaviour import hide_button, unhide_button | |
| class uploadFile: | |
| def __init__(self): | |
| self.file_dict = {} | |
| self.file_gt = {} | |
| def multiple_upload_file(self, uploaded_files): | |
| for _ in range(len(self.file_dict)): | |
| self.file_dict.popitem() | |
| for uploaded_file in uploaded_files: | |
| if uploaded_file.type == "application/pdf": | |
| try: | |
| with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc: | |
| text = chr(12).join([page.get_text() for page in doc]) | |
| self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text} | |
| except Exception: | |
| pass | |
| elif uploaded_file.type == "text/csv": | |
| try: | |
| df = pd.read_csv(uploaded_file) | |
| self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df} | |
| except Exception: | |
| pass | |
| st.session_state['uploaded_files'] = self.file_dict | |
| def upload_website_audience(self, uploaded_files): | |
| for _ in range(len(self.file_dict)): | |
| self.file_dict.popitem() | |
| for uploaded_file in uploaded_files: | |
| if uploaded_file.type == "application/pdf": | |
| try: | |
| with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc: | |
| text = chr(12).join([page.get_text() for page in doc]) | |
| self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text} | |
| except Exception: | |
| pass | |
| elif uploaded_file.type == "text/csv": | |
| try: | |
| # Skip comment lines that start with # | |
| df = pd.read_csv( | |
| uploaded_file, | |
| comment='#', # Treat lines starting with # as comments | |
| engine='python' # Use more flexible engine | |
| ) | |
| self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df} | |
| except Exception as e: | |
| print(f"Error processing CSV: {str(e)}") | |
| # If that fails, you could try a more manual approach | |
| try: | |
| uploaded_file.seek(0) | |
| raw_text = uploaded_file.read().decode('utf-8') | |
| # Get only non-comment lines | |
| data_lines = [line for line in raw_text.split('\n') if not line.strip().startswith('#')] | |
| # Use StringIO to create a file-like object from the filtered lines | |
| from io import StringIO | |
| csv_data = StringIO('\n'.join(data_lines)) | |
| # Read from the filtered data | |
| df = pd.read_csv(csv_data) | |
| self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df} | |
| except Exception as e: | |
| print(f"Second attempt failed: {str(e)}") | |
| st.session_state['upload_website_audience'] = self.file_dict | |
| def upload_file_seo(self, uploaded_files): | |
| for _ in range(len(self.file_dict)): | |
| self.file_dict.popitem() | |
| for uploaded_file in uploaded_files: | |
| if uploaded_file.type == "application/pdf": | |
| try: | |
| with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc: | |
| text = chr(12).join([page.get_text() for page in doc]) | |
| self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text} | |
| except Exception: | |
| pass | |
| elif uploaded_file.type == "text/csv": | |
| try: | |
| content = uploaded_file.read().decode("utf-8") | |
| self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': content} | |
| except Exception: | |
| pass | |
| st.session_state['uploaded_files'] = self.file_dict | |
| def upload_gt(self, gtmetrix): | |
| for _ in range(len(self.file_gt)): | |
| self.file_gt.popitem() | |
| for gtmetrixs in gtmetrix: | |
| if gtmetrixs.type == "application/pdf": | |
| try: | |
| with pymupdf.open(stream=gtmetrixs.read(), filetype="pdf") as doc: | |
| text = chr(12).join([page.get_text() for page in doc]) | |
| self.file_gt[gtmetrixs.name] = {'type': 'pdf', 'content': text} | |
| except Exception: | |
| pass | |
| elif gtmetrixs.type == "text/csv": | |
| try: | |
| content = gtmetrixs.read().decode("utf-8") | |
| self.file_dict[gtmetrixs.name] = {'type': 'csv', 'content': content} | |
| except Exception: | |
| pass | |
| st.session_state['uploaded_gt'] = self.file_gt | |
| if __name__ == "__main__": | |
| app = uploadFile() | |
| st.set_page_config(layout="wide") | |