Spaces:

sandsh
/

tech16_demo

Sleeping

App Files Files Community

tech16_demo / app.py

sandsh

Update app.py

f614198 verified over 1 year ago

raw

history blame contribute delete

13.4 kB

	import streamlit as st
	from openai import OpenAI

	import pandas as pd
	from simple_salesforce import Salesforce

	from langchain.agents import load_tools
	from langchain.agents import initialize_agent
	from langchain.agents import AgentType
	from langchain.chat_models import ChatOpenAI

	from langchain import hub
	from langchain.agents.format_scratchpad import format_log_to_str
	from langchain.agents.output_parsers import ReActSingleInputOutputParser
	from langchain.tools.render import render_text_description
	from langchain.tools import BaseTool, StructuredTool, tool

	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext
	from llama_index.experimental.query_engine import PandasQueryEngine
	from llama_index.core import PromptTemplate

	import os
	import random
	import numpy as np
	from datetime import datetime, timedelta

	# Sidebar
	with st.sidebar:
	openai_api_key = st.text_input("Enter your OpenAI API key:", key="chatbot_api_key", type="password")
	os.environ["OPENAI_API_KEY"]=openai_api_key

	# Streamlit app
	st.title("Sales CoPilot Chatbot")

	st.write("Connecting to SalesForce")

	sf= Salesforce(
	username='salescopilot@stanford.ai',
	password='Salestech16',
	security_token='7MeFPnJLh8VMXkDo94XL7gio',
	domain='login')

	# Query Opportunities
	opportunity_columns = sf.query_all("""
	SELECT
	Id, Name, Amount,
	AccountId, StageName, ExpectedRevenue, CloseDate,
	Probability, OwnerId, LeadSource,
	CreatedDate,
	ForecastCategory
	FROM Opportunity
	""")

	# Query Accounts
	account_columns = sf.query_all("""
	SELECT Id, Name, Industry, AnnualRevenue
	FROM Account
	""")

	# Convert records to DataFrames and drop the 'attributes' column
	opportunities_df = pd.DataFrame(opportunity_columns['records']).drop(columns='attributes')
	accounts_df = pd.DataFrame(account_columns['records']).drop(columns='attributes')

	# Merge DataFrames to create a comprehensive view
	#df_structured = pd.merge(opportunities_df, accounts_df, left_on='AccountId', right_on='Id', suffixes=('_Opportunity', '_Account'))
	#df_structured.head()
	#st.write("Merged")

	## Owner name and email mapping:
	owner_mapping = {
	'005ak000007B1b5AAC': ['Alice Johnson'],
	'005ak000007B2g5AAC': ['Eva Green'],
	'005ak000007C3h6AAC': ['Grace Black'],
	'005ak000007D4j7AAC': ['Ivy Smith']
	}

	# Mock emails for the names
	contact_details = {
	'Alice Johnson': 'alice.johnson@salescrm.com',
	'Eva Green': 'eva.green@salescrm.com',
	'Grace Black': 'grace.black@salescrm.com',
	'Ivy Smith': 'ivy.smith@salescrm.com',
	}

	# Assign multiple unique Owner IDs to the DataFrame
	unique_owner_ids = list(owner_mapping.keys())
	opportunities_df['OwnerId'] = random.choices(unique_owner_ids, k=len(opportunities_df))

	# Function to randomly assign a name for each OwnerId
	def assign_random_name(owner_id):
	return random.choice(owner_mapping[owner_id])

	# Assign Owner Names and Emails
	opportunities_df['OwnerName'] = opportunities_df['OwnerId'].apply(assign_random_name)
	opportunities_df['Email'] = opportunities_df['OwnerName'].apply(lambda name: contact_details[name])

	## Add Close date and Created Date:
	# Add CreatedDate randomly between Jan-Mar 2024
	start_date = datetime(2024, 1, 1)
	end_date = datetime(2024, 3, 31)
	opportunities_df['CreatedDate'] = opportunities_df['CreatedDate'].apply(
	lambda _: start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
	)


	# Function to generate a random date between two dates
	def random_date(start_date, end_date):
	return start_date + timedelta(days=random.randint(0, (end_date - start_date).days))

	# Define the date range for CloseDate
	close_start_date = datetime(2024, 4, 1)
	close_end_date = datetime(2024, 7, 31)

	# Update CloseDate only if StageName is 'Closed-Won'
	opportunities_df['CloseDate'] = opportunities_df.apply(
	lambda row: random_date(close_start_date, close_end_date)
	if row['StageName'] == 'Closed Won' else np.nan,
	axis=1
	)

	# Function to set StageName as 'Closed-Lost' with a probability of 0
	def set_closed_lost(row):
	if random.random() < 0.07: # 7% of the opportunities should be closed-lost
	row['StageName'] = 'Closed Lost'
	row['Probability'] = 0
	row['CloseDate'] = random_date(close_start_date, close_end_date)
	row['ForecastCategory'] = 'Closed'
	return row


	# Apply the function to each row in the DataFrame
	opportunities_df = opportunities_df.apply(set_closed_lost, axis=1)

	# Add Customer Type
	opportunities_df['CustomerType'] = opportunities_df['StageName'].apply(
	lambda x: random.choice(['Existing Customer Upgrade', 'New Customer Acquisition'])
	)
	#Add TopdealFlag
	# Define the threshold
	high_value_threshold = 200000

	# Define the Topdealflag based on Amount and CustomerType
	opportunities_df['Topdealflag'] = opportunities_df.apply(
	lambda row: True if row['Amount'] > high_value_threshold else False,
	axis=1
	)

	# Remove any duplicate names in the Name column
	opportunities_df = opportunities_df.drop_duplicates(subset='Name')
	# Delete rows where Name is 'United Oil Installations' or 'United Oil Emergency Generators'
	opportunities_df = opportunities_df[~opportunities_df['Name'].isin(['United Oil Installations', 'United Oil Emergency Generators', 'United Oil Plant Standby Generators', 'United Oil Standby Generators', 'United Oil Refinery Generators', ])]

	# Add a new column 'Product' with product tiers based on Amount and ExpectedRevenue
	def assign_product_tier(row):
	if row['Amount'] > 500000 or row['ExpectedRevenue'] > 110000:
	return 'Sales Copilot Enterprise'
	elif row['Amount'] > 200000 or row['ExpectedRevenue'] > 50000:
	return 'Sales Copilot Premium'
	else:
	return 'Sales Copilot Basic'

	opportunities_df['Product'] = opportunities_df.apply(assign_product_tier, axis=1)

	# Arrange the columns in a logical order
	columns_order = [
	'Id', # Unique identifier for each opportunity
	'Name', # Name or title of the opportunity or deal
	'AccountId', # Identifier for the related account (if available)
	'OwnerId', # Identifier of the sales representative
	'OwnerName', # Name of the sales representative
	'Email', # Email of the sales representative
	'Amount', # Potential revenue or value associated with the opportunity
	'Probability', # Likelihood of winning the opportunity
	'ExpectedRevenue', # Anticipated revenue (Amount * Probability)
	'StageName', # Current stage of the opportunity
	'CloseDate', # Expected or actual close date
	'CreatedDate', # Date on which the opportunity was created
	'CustomerType', # Type of customer (e.g., new or existing)
	'LeadSource', # Source from which the opportunity originated
	'ForecastCategory',# Forecasting category - whether in the pipeline or closed
	'Topdealflag', # Indicator of whether it's a top deal
	'Product' # Sales Copilot product tier
	]
	# Reorder the DataFrame columns
	opportunities_df = opportunities_df[columns_order]

	# # Define the additional rows as a DataFrame
	additional_rows = pd.DataFrame({
	'Id': ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ'],
	'Name': ['Delta Machineries', 'United Oil SLA', 'Dickenson Mobile Generators', 'Grand Hotels Kitchen Generator'],
	'AccountId': ['001ak00000UEdzaAAD', '001ak00000UEdzaAAD', '001ak00000UEdzYAAT', '001ak00000UEdzZAAT'],
	'OwnerId': ['005ak000007B2g5AAC', '005ak000007D4j7AAC', '005ak000007C3h6AAC', '005ak000007B1b5AAC'],
	'OwnerName': ['Eva Green', 'Ivy Smith', 'Grace Black', 'Alice Johnson'],
	'Email': ['eva.green@salescrm.com', 'ivy.smith@salescrm.com', 'grace.black@salescrm.com', 'alice.johnson@salescrm.com'],
	'Amount': [125000, 120000, 15000, 15000],
	'Probability': [90, 80, 10, 60],
	'ExpectedRevenue': [112500, 96000, 1500, 9000],
	'StageName': ['Negotiation/Review', 'Value Proposition', 'Qualification', 'Closed Lost'],
	'CloseDate': [pd.NaT, '2024-09-30', pd.NaT, pd.NaT],
	'CreatedDate': ['2024-01-28', '2024-02-11', '2024-02-03', '2024-01-08'],
	'CustomerType': ['Existing Customer Upgrade', 'New Customer Acquisition', 'Existing Customer Upgrade', 'Existing Customer Upgrade'],
	'LeadSource': [pd.NaT, 'Partner', 'Purchased List', pd.NaT],
	'ForecastCategory': ['Pipeline', 'Pipeline', 'Pipeline', 'Pipeline'],
	'Topdealflag': [True, True, False, False],
	'Product': ['Sales Copilot Enterprise', 'Sales Copilot Enterprise', 'Sales Copilot Basic', 'Sales Copilot Basic']
	})

	# # Remove rows with the specified Opportunity IDs from the existing DataFrame
	ids_to_remove = ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ']
	opportunities_df = opportunities_df[~opportunities_df['Id'].isin(ids_to_remove)]

	# # Append the additional rows to the filtered DataFrame
	opportunities_df = pd.concat([opportunities_df, additional_rows], ignore_index=True)

	df_salesforce = opportunities_df.rename(columns={"Name": "Opportunity", "OwnerName": "Sales_Rep", "Amount": "Value", "StageName": "Stage", "Topdealflag": "Top_deal"})

	st.write("connected live with Salesforce")

	st.dataframe(df_salesforce)

	#Build query engines
	#from llama_index.experimental.query_engine import PandasQueryEngine
	#from llama_index.core import PromptTemplate

	#df_salesforce = pd.read_csv('opportunities.csv')

	#df_salesforce = opportunities_df

	structured_query_engine = PandasQueryEngine(df=df_salesforce, verbose=True)
	#df_salesforce.head()

	#prompts = structured_query_engine.get_prompts()
	#print(prompts["pandas_prompt"].template)

	#from langchain.tools import BaseTool, StructuredTool, tool

	# Creating a query engine that will interact with the pandas DataFrame to perform queries

	# Defining a custom tool for querying the pandas DataFrame using natural language
	class PandasTool(BaseTool):
	name: str = "PandasTool"
	description: str = """
	Use this tool to collect and analyze a dataset using Pandas.
	"""

	def _run(self, question_to_answer: str) -> str:
	context = self.pandas_query_engine(question_to_answer)
	return context

	def pandas_query_engine(self, question_to_answer: str) -> str:
	context = structured_query_engine.query(question_to_answer)
	return context

	# Instantiating the PandasTool for later use by agents
	pandas_tool = PandasTool()


	# Read documents from the specified directory and load a specific document
	documents = SimpleDirectoryReader("./").load_data("Combined-documents-internal-external.pdf")

	# Create a VectorStoreIndex object from the documents. This will involve processing the documents
	# and creating a vector representation for each of them, suitable for semantic searching.
	index = VectorStoreIndex.from_documents(documents)

	# Convert the VectorStoreIndex object into a query engine. This query engine can be used to
	# perform semantic searches on the index, matching natural language queries to the most relevant
	# documents in the index.
	document_query_engine = index.as_query_engine()


	# Defining a custom tool for searching the documents
	class DocumentsTool(BaseTool):
	name: str = "DocumentsTool"
	description: str = """
	Use this tool to search the documents to find answers to questions or to summarize documents
	"""

	def _run(self, question_to_answer: str) -> str:
	context = self.unstructured_query_engine(question_to_answer)
	return context

	def unstructured_query_engine(self, question_to_answer: str) -> str:
	context = document_query_engine.query(question_to_answer)
	return context

	# Instantiating the DocumentsTool for later use by agents
	documents_tool = DocumentsTool()

	from langchain_openai import ChatOpenAI

	# load the language model we're using to control the agent
	llm = ChatOpenAI(model = "gpt-4",temperature=0)
	#llm = ChatOpenAI(model = "o1-preview",temperature=0)

	# load custom tools and a default tool
	tools = [pandas_tool, documents_tool]

	# initialize agent with the tools, language model, and the type of agent
	agent = initialize_agent(tools,
	llm=llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
	)

	if "messages" not in st.session_state:
	st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

	for msg in st.session_state.messages:
	st.chat_message(msg["role"]).write(msg["content"])

	if prompt := st.chat_input():
	if not openai_api_key:
	st.info("Please add your OpenAI API key to continue.")
	st.stop()

	#client = OpenAI(api_key=openai_api_key)
	st.session_state.messages.append({"role": "user", "content": prompt})
	st.chat_message("user").write(prompt)

	#response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
	#msg = response.choices[0].message.content

	msg = agent.run(prompt)
	st.session_state.messages.append({"role": "assistant", "content": msg})

	st.chat_message("assistant").write(msg)

	# Get OpenAI API key from user
	#api_key = st.text_input("Enter your OpenAI API key:", type="password")
	#client = OpenAI(api_key=api_key)