Spaces:

roniorque
/

df_ai_int

Build error

Ronio Jerico Roque

Add WebsiteAudienceAcquisition class and integrate into analysis workflow; refactor upload handling in uploadFile

c650b65 12 months ago

5.25 kB

	import streamlit as st
	import pandas as pd
	import pymupdf
	from helper.button_behaviour import hide_button, unhide_button

	class uploadFile:
	def __init__(self):
	self.file_dict = {}
	self.file_gt = {}

	def multiple_upload_file(self, uploaded_files):
	for _ in range(len(self.file_dict)):
	self.file_dict.popitem()

	for uploaded_file in uploaded_files:
	if uploaded_file.type == "application/pdf":
	try:
	with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = chr(12).join([page.get_text() for page in doc])
	self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
	except Exception:
	pass
	elif uploaded_file.type == "text/csv":
	try:
	df = pd.read_csv(uploaded_file)
	self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
	except Exception:
	pass

	st.session_state['uploaded_files'] = self.file_dict

	def upload_website_audience(self, uploaded_files):
	for _ in range(len(self.file_dict)):
	self.file_dict.popitem()

	for uploaded_file in uploaded_files:
	if uploaded_file.type == "application/pdf":
	try:
	with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = chr(12).join([page.get_text() for page in doc])
	self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
	except Exception:
	pass
	elif uploaded_file.type == "text/csv":
	try:
	# Skip comment lines that start with #
	df = pd.read_csv(
	uploaded_file,
	comment='#', # Treat lines starting with # as comments
	engine='python' # Use more flexible engine
	)
	self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
	except Exception as e:
	print(f"Error processing CSV: {str(e)}")
	# If that fails, you could try a more manual approach
	try:
	uploaded_file.seek(0)
	raw_text = uploaded_file.read().decode('utf-8')
	# Get only non-comment lines
	data_lines = [line for line in raw_text.split('\n') if not line.strip().startswith('#')]

	# Use StringIO to create a file-like object from the filtered lines
	from io import StringIO
	csv_data = StringIO('\n'.join(data_lines))

	# Read from the filtered data
	df = pd.read_csv(csv_data)
	self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': df}
	except Exception as e:
	print(f"Second attempt failed: {str(e)}")

	st.session_state['upload_website_audience'] = self.file_dict

	def upload_file_seo(self, uploaded_files):
	for _ in range(len(self.file_dict)):
	self.file_dict.popitem()

	for uploaded_file in uploaded_files:
	if uploaded_file.type == "application/pdf":
	try:
	with pymupdf.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	text = chr(12).join([page.get_text() for page in doc])
	self.file_dict[uploaded_file.name] = {'type': 'pdf', 'content': text}
	except Exception:
	pass
	elif uploaded_file.type == "text/csv":
	try:
	content = uploaded_file.read().decode("utf-8")
	self.file_dict[uploaded_file.name] = {'type': 'csv', 'content': content}
	except Exception:
	pass

	st.session_state['uploaded_files'] = self.file_dict

	def upload_gt(self, gtmetrix):
	for _ in range(len(self.file_gt)):
	self.file_gt.popitem()

	for gtmetrixs in gtmetrix:
	if gtmetrixs.type == "application/pdf":
	try:
	with pymupdf.open(stream=gtmetrixs.read(), filetype="pdf") as doc:
	text = chr(12).join([page.get_text() for page in doc])
	self.file_gt[gtmetrixs.name] = {'type': 'pdf', 'content': text}
	except Exception:
	pass
	elif gtmetrixs.type == "text/csv":
	try:
	content = gtmetrixs.read().decode("utf-8")
	self.file_dict[gtmetrixs.name] = {'type': 'csv', 'content': content}
	except Exception:
	pass
	st.session_state['uploaded_gt'] = self.file_gt

	if __name__ == "__main__":
	app = uploadFile()
	st.set_page_config(layout="wide")