tech16_demo / app.py
sandsh's picture
Update app.py
f614198 verified
import streamlit as st
from openai import OpenAI
import pandas as pd
from simple_salesforce import Salesforce
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain import hub
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActSingleInputOutputParser
from langchain.tools.render import render_text_description
from langchain.tools import BaseTool, StructuredTool, tool
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.core import PromptTemplate
import os
import random
import numpy as np
from datetime import datetime, timedelta
# Sidebar
with st.sidebar:
openai_api_key = st.text_input("Enter your OpenAI API key:", key="chatbot_api_key", type="password")
os.environ["OPENAI_API_KEY"]=openai_api_key
# Streamlit app
st.title("Sales CoPilot Chatbot")
st.write("Connecting to SalesForce")
sf= Salesforce(
username='salescopilot@stanford.ai',
password='Salestech16',
security_token='7MeFPnJLh8VMXkDo94XL7gio',
domain='login')
# Query Opportunities
opportunity_columns = sf.query_all("""
SELECT
Id, Name, Amount,
AccountId, StageName, ExpectedRevenue, CloseDate,
Probability, OwnerId, LeadSource,
CreatedDate,
ForecastCategory
FROM Opportunity
""")
# Query Accounts
account_columns = sf.query_all("""
SELECT Id, Name, Industry, AnnualRevenue
FROM Account
""")
# Convert records to DataFrames and drop the 'attributes' column
opportunities_df = pd.DataFrame(opportunity_columns['records']).drop(columns='attributes')
accounts_df = pd.DataFrame(account_columns['records']).drop(columns='attributes')
# Merge DataFrames to create a comprehensive view
#df_structured = pd.merge(opportunities_df, accounts_df, left_on='AccountId', right_on='Id', suffixes=('_Opportunity', '_Account'))
#df_structured.head()
#st.write("Merged")
## Owner name and email mapping:
owner_mapping = {
'005ak000007B1b5AAC': ['Alice Johnson'],
'005ak000007B2g5AAC': ['Eva Green'],
'005ak000007C3h6AAC': ['Grace Black'],
'005ak000007D4j7AAC': ['Ivy Smith']
}
# Mock emails for the names
contact_details = {
'Alice Johnson': 'alice.johnson@salescrm.com',
'Eva Green': 'eva.green@salescrm.com',
'Grace Black': 'grace.black@salescrm.com',
'Ivy Smith': 'ivy.smith@salescrm.com',
}
# Assign multiple unique Owner IDs to the DataFrame
unique_owner_ids = list(owner_mapping.keys())
opportunities_df['OwnerId'] = random.choices(unique_owner_ids, k=len(opportunities_df))
# Function to randomly assign a name for each OwnerId
def assign_random_name(owner_id):
return random.choice(owner_mapping[owner_id])
# Assign Owner Names and Emails
opportunities_df['OwnerName'] = opportunities_df['OwnerId'].apply(assign_random_name)
opportunities_df['Email'] = opportunities_df['OwnerName'].apply(lambda name: contact_details[name])
## Add Close date and Created Date:
# Add CreatedDate randomly between Jan-Mar 2024
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 3, 31)
opportunities_df['CreatedDate'] = opportunities_df['CreatedDate'].apply(
lambda _: start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
)
# Function to generate a random date between two dates
def random_date(start_date, end_date):
return start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
# Define the date range for CloseDate
close_start_date = datetime(2024, 4, 1)
close_end_date = datetime(2024, 7, 31)
# Update CloseDate only if StageName is 'Closed-Won'
opportunities_df['CloseDate'] = opportunities_df.apply(
lambda row: random_date(close_start_date, close_end_date)
if row['StageName'] == 'Closed Won' else np.nan,
axis=1
)
# Function to set StageName as 'Closed-Lost' with a probability of 0
def set_closed_lost(row):
if random.random() < 0.07: # 7% of the opportunities should be closed-lost
row['StageName'] = 'Closed Lost'
row['Probability'] = 0
row['CloseDate'] = random_date(close_start_date, close_end_date)
row['ForecastCategory'] = 'Closed'
return row
# Apply the function to each row in the DataFrame
opportunities_df = opportunities_df.apply(set_closed_lost, axis=1)
# Add Customer Type
opportunities_df['CustomerType'] = opportunities_df['StageName'].apply(
lambda x: random.choice(['Existing Customer Upgrade', 'New Customer Acquisition'])
)
#Add TopdealFlag
# Define the threshold
high_value_threshold = 200000
# Define the Topdealflag based on Amount and CustomerType
opportunities_df['Topdealflag'] = opportunities_df.apply(
lambda row: True if row['Amount'] > high_value_threshold else False,
axis=1
)
# Remove any duplicate names in the Name column
opportunities_df = opportunities_df.drop_duplicates(subset='Name')
# Delete rows where Name is 'United Oil Installations' or 'United Oil Emergency Generators'
opportunities_df = opportunities_df[~opportunities_df['Name'].isin(['United Oil Installations', 'United Oil Emergency Generators', 'United Oil Plant Standby Generators', 'United Oil Standby Generators', 'United Oil Refinery Generators', ])]
# Add a new column 'Product' with product tiers based on Amount and ExpectedRevenue
def assign_product_tier(row):
if row['Amount'] > 500000 or row['ExpectedRevenue'] > 110000:
return 'Sales Copilot Enterprise'
elif row['Amount'] > 200000 or row['ExpectedRevenue'] > 50000:
return 'Sales Copilot Premium'
else:
return 'Sales Copilot Basic'
opportunities_df['Product'] = opportunities_df.apply(assign_product_tier, axis=1)
# Arrange the columns in a logical order
columns_order = [
'Id', # Unique identifier for each opportunity
'Name', # Name or title of the opportunity or deal
'AccountId', # Identifier for the related account (if available)
'OwnerId', # Identifier of the sales representative
'OwnerName', # Name of the sales representative
'Email', # Email of the sales representative
'Amount', # Potential revenue or value associated with the opportunity
'Probability', # Likelihood of winning the opportunity
'ExpectedRevenue', # Anticipated revenue (Amount * Probability)
'StageName', # Current stage of the opportunity
'CloseDate', # Expected or actual close date
'CreatedDate', # Date on which the opportunity was created
'CustomerType', # Type of customer (e.g., new or existing)
'LeadSource', # Source from which the opportunity originated
'ForecastCategory',# Forecasting category - whether in the pipeline or closed
'Topdealflag', # Indicator of whether it's a top deal
'Product' # Sales Copilot product tier
]
# Reorder the DataFrame columns
opportunities_df = opportunities_df[columns_order]
# # Define the additional rows as a DataFrame
additional_rows = pd.DataFrame({
'Id': ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ'],
'Name': ['Delta Machineries', 'United Oil SLA', 'Dickenson Mobile Generators', 'Grand Hotels Kitchen Generator'],
'AccountId': ['001ak00000UEdzaAAD', '001ak00000UEdzaAAD', '001ak00000UEdzYAAT', '001ak00000UEdzZAAT'],
'OwnerId': ['005ak000007B2g5AAC', '005ak000007D4j7AAC', '005ak000007C3h6AAC', '005ak000007B1b5AAC'],
'OwnerName': ['Eva Green', 'Ivy Smith', 'Grace Black', 'Alice Johnson'],
'Email': ['eva.green@salescrm.com', 'ivy.smith@salescrm.com', 'grace.black@salescrm.com', 'alice.johnson@salescrm.com'],
'Amount': [125000, 120000, 15000, 15000],
'Probability': [90, 80, 10, 60],
'ExpectedRevenue': [112500, 96000, 1500, 9000],
'StageName': ['Negotiation/Review', 'Value Proposition', 'Qualification', 'Closed Lost'],
'CloseDate': [pd.NaT, '2024-09-30', pd.NaT, pd.NaT],
'CreatedDate': ['2024-01-28', '2024-02-11', '2024-02-03', '2024-01-08'],
'CustomerType': ['Existing Customer Upgrade', 'New Customer Acquisition', 'Existing Customer Upgrade', 'Existing Customer Upgrade'],
'LeadSource': [pd.NaT, 'Partner', 'Purchased List', pd.NaT],
'ForecastCategory': ['Pipeline', 'Pipeline', 'Pipeline', 'Pipeline'],
'Topdealflag': [True, True, False, False],
'Product': ['Sales Copilot Enterprise', 'Sales Copilot Enterprise', 'Sales Copilot Basic', 'Sales Copilot Basic']
})
# # Remove rows with the specified Opportunity IDs from the existing DataFrame
ids_to_remove = ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ']
opportunities_df = opportunities_df[~opportunities_df['Id'].isin(ids_to_remove)]
# # Append the additional rows to the filtered DataFrame
opportunities_df = pd.concat([opportunities_df, additional_rows], ignore_index=True)
df_salesforce = opportunities_df.rename(columns={"Name": "Opportunity", "OwnerName": "Sales_Rep", "Amount": "Value", "StageName": "Stage", "Topdealflag": "Top_deal"})
st.write("connected live with Salesforce")
st.dataframe(df_salesforce)
#Build query engines
#from llama_index.experimental.query_engine import PandasQueryEngine
#from llama_index.core import PromptTemplate
#df_salesforce = pd.read_csv('opportunities.csv')
#df_salesforce = opportunities_df
structured_query_engine = PandasQueryEngine(df=df_salesforce, verbose=True)
#df_salesforce.head()
#prompts = structured_query_engine.get_prompts()
#print(prompts["pandas_prompt"].template)
#from langchain.tools import BaseTool, StructuredTool, tool
# Creating a query engine that will interact with the pandas DataFrame to perform queries
# Defining a custom tool for querying the pandas DataFrame using natural language
class PandasTool(BaseTool):
name: str = "PandasTool"
description: str = """
Use this tool to collect and analyze a dataset using Pandas.
"""
def _run(self, question_to_answer: str) -> str:
context = self.pandas_query_engine(question_to_answer)
return context
def pandas_query_engine(self, question_to_answer: str) -> str:
context = structured_query_engine.query(question_to_answer)
return context
# Instantiating the PandasTool for later use by agents
pandas_tool = PandasTool()
# Read documents from the specified directory and load a specific document
documents = SimpleDirectoryReader("./").load_data("Combined-documents-internal-external.pdf")
# Create a VectorStoreIndex object from the documents. This will involve processing the documents
# and creating a vector representation for each of them, suitable for semantic searching.
index = VectorStoreIndex.from_documents(documents)
# Convert the VectorStoreIndex object into a query engine. This query engine can be used to
# perform semantic searches on the index, matching natural language queries to the most relevant
# documents in the index.
document_query_engine = index.as_query_engine()
# Defining a custom tool for searching the documents
class DocumentsTool(BaseTool):
name: str = "DocumentsTool"
description: str = """
Use this tool to search the documents to find answers to questions or to summarize documents
"""
def _run(self, question_to_answer: str) -> str:
context = self.unstructured_query_engine(question_to_answer)
return context
def unstructured_query_engine(self, question_to_answer: str) -> str:
context = document_query_engine.query(question_to_answer)
return context
# Instantiating the DocumentsTool for later use by agents
documents_tool = DocumentsTool()
from langchain_openai import ChatOpenAI
# load the language model we're using to control the agent
llm = ChatOpenAI(model = "gpt-4",temperature=0)
#llm = ChatOpenAI(model = "o1-preview",temperature=0)
# load custom tools and a default tool
tools = [pandas_tool, documents_tool]
# initialize agent with the tools, language model, and the type of agent
agent = initialize_agent(tools,
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
if prompt := st.chat_input():
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
#client = OpenAI(api_key=openai_api_key)
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
#response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
#msg = response.choices[0].message.content
msg = agent.run(prompt)
st.session_state.messages.append({"role": "assistant", "content": msg})
st.chat_message("assistant").write(msg)
# Get OpenAI API key from user
#api_key = st.text_input("Enter your OpenAI API key:", type="password")
#client = OpenAI(api_key=api_key)