Spaces:

roniorque
/

df_ai_int

Build error

Ronio Jerico Roque

Add SEO analysis modules and refactor existing code

b438494 about 1 year ago

5.01 kB

	from io import StringIO
	from urllib.parse import urlparse
	import streamlit as st
	import requests
	from dotenv import load_dotenv
	import os
	import time
	from helper.telemetry import collect_telemetry
	from helper.upload_File import uploadFile
	from helper.button_behaviour import hide_button, unhide_button
	from helper.initialize_analyze_session import initialize_analyze_session
	import pandas as pd
	import asyncio
	import json

	class SeoOffPageAnalyst:
	def __init__(self, model_url):
	self.uploaded_files = []
	self.file_dict = {}
	self.model_url = model_url
	#self.analyst_name = analyst_name
	#self.data_src = data_src
	#self.analyst_description = analyst_description
	self.initialize()
	self.row1()

	def initialize(self):
	# FOR ENV
	load_dotenv()

	# AGENT NAME
	#st.header(self.analyst_name)
	if 'off_page_file_uploaded' not in st.session_state:
	st.session_state['off_page_file_uploaded'] = ''

	def request_model(self, payload_txt, headers):
	response = requests.post(self.model_url, json=payload_txt, headers=headers)
	response.raise_for_status()
	output = response.json()
	text = output["outputs"][0]["outputs"][0]["results"]["text"]["data"]["text"]
	text = json.loads(text)

	backlinks = text[0]
	referring_domains = text[1]

	return text

	def process(self):
	start_time = time.time()
	session = st.session_state['analyze']
	if self.uploaded_files and session == 'clicked':
	combined_text = ""
	with st.spinner('SEO Off Page Analyst...', show_time=True):
	st.write('')
	for file_info in st.session_state['uploaded_files'].values():
	'''
	if file_info['type'] == 'pdf':
	combined_text += file_info['content'] + "\n"
	'''
	try:
	if file_info['type'] == 'csv':
	# Load CSV
	df = pd.read_csv(StringIO(file_info['content'].to_csv(index=True)))

	# Count total rows
	num_rows = len(df)

	# Extract unique domains from 'Source url'
	df['Source Domain'] = df['Source url'].apply(lambda x: urlparse(x).netloc)
	unique_domains = df['Source Domain'].nunique()

	combined_text += f"Total Backlinks Count: {num_rows}\n"
	combined_text += f"Referring Domain: {unique_domains}"
	st.info("Backlinks - SEMRush Uploaded Successfuly", icon="ℹ️")
	except KeyError:
	st.info("Incorrect CSV format. Please upload a valid CSV file.")
	# OUTPUT FOR SEO ANALYST
	payload_txt = {"question": combined_text}
	headers = {
	"Content-Type": "application/json",
	"x-api-key": f"{os.getenv('x-api-key')}"
	}
	#result = self.request_model(payload_txt, headers)

	#end_time = time.time()
	#time_lapsed = end_time - start_time

	debug_info = {'data_field' : 'Backlinks', 'result': payload_txt}
	#debug_info = {'url_uuid': self.model_url.split("-")[-1],'time_lapsed' : time_lapsed, 'files': [*st.session_state['uploaded_files']],'payload': payload_txt, 'result': result}
	collect_telemetry(debug_info)

	st.session_state["off_page_file_uploaded"] = 'uploaded'

	#with st.expander("Debug information", icon="⚙"):
	# st.write(debug_info)
	st.session_state['analyzing'] = False
	def row1(self):
	#st.write(self.data_src)
	self.uploaded_files = st.file_uploader('Backlinks - SEMRush', type='csv', accept_multiple_files=True, key="seo_off")
	if self.uploaded_files:
	upload.multiple_upload_file(self.uploaded_files)

	#st.write("") # FOR THE HIDE BUTTON
	#st.write("") # FOR THE HIDE BUTTON

	st.session_state['analyzing'] = False

	self.process()


	if __name__ == "__main__":
	st.set_page_config(layout="wide")

	upload = uploadFile()