Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from openai import OpenAI | |
| import pandas as pd | |
| from simple_salesforce import Salesforce | |
| from langchain.agents import load_tools | |
| from langchain.agents import initialize_agent | |
| from langchain.agents import AgentType | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain import hub | |
| from langchain.agents.format_scratchpad import format_log_to_str | |
| from langchain.agents.output_parsers import ReActSingleInputOutputParser | |
| from langchain.tools.render import render_text_description | |
| from langchain.tools import BaseTool, StructuredTool, tool | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext | |
| from llama_index.experimental.query_engine import PandasQueryEngine | |
| from llama_index.core import PromptTemplate | |
| import os | |
| import random | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| # Sidebar | |
| with st.sidebar: | |
| openai_api_key = st.text_input("Enter your OpenAI API key:", key="chatbot_api_key", type="password") | |
| os.environ["OPENAI_API_KEY"]=openai_api_key | |
| # Streamlit app | |
| st.title("Sales CoPilot Chatbot") | |
| st.write("Connecting to SalesForce") | |
| sf= Salesforce( | |
| username='salescopilot@stanford.ai', | |
| password='Salestech16', | |
| security_token='7MeFPnJLh8VMXkDo94XL7gio', | |
| domain='login') | |
| # Query Opportunities | |
| opportunity_columns = sf.query_all(""" | |
| SELECT | |
| Id, Name, Amount, | |
| AccountId, StageName, ExpectedRevenue, CloseDate, | |
| Probability, OwnerId, LeadSource, | |
| CreatedDate, | |
| ForecastCategory | |
| FROM Opportunity | |
| """) | |
| # Query Accounts | |
| account_columns = sf.query_all(""" | |
| SELECT Id, Name, Industry, AnnualRevenue | |
| FROM Account | |
| """) | |
| # Convert records to DataFrames and drop the 'attributes' column | |
| opportunities_df = pd.DataFrame(opportunity_columns['records']).drop(columns='attributes') | |
| accounts_df = pd.DataFrame(account_columns['records']).drop(columns='attributes') | |
| # Merge DataFrames to create a comprehensive view | |
| #df_structured = pd.merge(opportunities_df, accounts_df, left_on='AccountId', right_on='Id', suffixes=('_Opportunity', '_Account')) | |
| #df_structured.head() | |
| #st.write("Merged") | |
| ## Owner name and email mapping: | |
| owner_mapping = { | |
| '005ak000007B1b5AAC': ['Alice Johnson'], | |
| '005ak000007B2g5AAC': ['Eva Green'], | |
| '005ak000007C3h6AAC': ['Grace Black'], | |
| '005ak000007D4j7AAC': ['Ivy Smith'] | |
| } | |
| # Mock emails for the names | |
| contact_details = { | |
| 'Alice Johnson': 'alice.johnson@salescrm.com', | |
| 'Eva Green': 'eva.green@salescrm.com', | |
| 'Grace Black': 'grace.black@salescrm.com', | |
| 'Ivy Smith': 'ivy.smith@salescrm.com', | |
| } | |
| # Assign multiple unique Owner IDs to the DataFrame | |
| unique_owner_ids = list(owner_mapping.keys()) | |
| opportunities_df['OwnerId'] = random.choices(unique_owner_ids, k=len(opportunities_df)) | |
| # Function to randomly assign a name for each OwnerId | |
| def assign_random_name(owner_id): | |
| return random.choice(owner_mapping[owner_id]) | |
| # Assign Owner Names and Emails | |
| opportunities_df['OwnerName'] = opportunities_df['OwnerId'].apply(assign_random_name) | |
| opportunities_df['Email'] = opportunities_df['OwnerName'].apply(lambda name: contact_details[name]) | |
| ## Add Close date and Created Date: | |
| # Add CreatedDate randomly between Jan-Mar 2024 | |
| start_date = datetime(2024, 1, 1) | |
| end_date = datetime(2024, 3, 31) | |
| opportunities_df['CreatedDate'] = opportunities_df['CreatedDate'].apply( | |
| lambda _: start_date + timedelta(days=random.randint(0, (end_date - start_date).days)) | |
| ) | |
| # Function to generate a random date between two dates | |
| def random_date(start_date, end_date): | |
| return start_date + timedelta(days=random.randint(0, (end_date - start_date).days)) | |
| # Define the date range for CloseDate | |
| close_start_date = datetime(2024, 4, 1) | |
| close_end_date = datetime(2024, 7, 31) | |
| # Update CloseDate only if StageName is 'Closed-Won' | |
| opportunities_df['CloseDate'] = opportunities_df.apply( | |
| lambda row: random_date(close_start_date, close_end_date) | |
| if row['StageName'] == 'Closed Won' else np.nan, | |
| axis=1 | |
| ) | |
| # Function to set StageName as 'Closed-Lost' with a probability of 0 | |
| def set_closed_lost(row): | |
| if random.random() < 0.07: # 7% of the opportunities should be closed-lost | |
| row['StageName'] = 'Closed Lost' | |
| row['Probability'] = 0 | |
| row['CloseDate'] = random_date(close_start_date, close_end_date) | |
| row['ForecastCategory'] = 'Closed' | |
| return row | |
| # Apply the function to each row in the DataFrame | |
| opportunities_df = opportunities_df.apply(set_closed_lost, axis=1) | |
| # Add Customer Type | |
| opportunities_df['CustomerType'] = opportunities_df['StageName'].apply( | |
| lambda x: random.choice(['Existing Customer Upgrade', 'New Customer Acquisition']) | |
| ) | |
| #Add TopdealFlag | |
| # Define the threshold | |
| high_value_threshold = 200000 | |
| # Define the Topdealflag based on Amount and CustomerType | |
| opportunities_df['Topdealflag'] = opportunities_df.apply( | |
| lambda row: True if row['Amount'] > high_value_threshold else False, | |
| axis=1 | |
| ) | |
| # Remove any duplicate names in the Name column | |
| opportunities_df = opportunities_df.drop_duplicates(subset='Name') | |
| # Delete rows where Name is 'United Oil Installations' or 'United Oil Emergency Generators' | |
| opportunities_df = opportunities_df[~opportunities_df['Name'].isin(['United Oil Installations', 'United Oil Emergency Generators', 'United Oil Plant Standby Generators', 'United Oil Standby Generators', 'United Oil Refinery Generators', ])] | |
| # Add a new column 'Product' with product tiers based on Amount and ExpectedRevenue | |
| def assign_product_tier(row): | |
| if row['Amount'] > 500000 or row['ExpectedRevenue'] > 110000: | |
| return 'Sales Copilot Enterprise' | |
| elif row['Amount'] > 200000 or row['ExpectedRevenue'] > 50000: | |
| return 'Sales Copilot Premium' | |
| else: | |
| return 'Sales Copilot Basic' | |
| opportunities_df['Product'] = opportunities_df.apply(assign_product_tier, axis=1) | |
| # Arrange the columns in a logical order | |
| columns_order = [ | |
| 'Id', # Unique identifier for each opportunity | |
| 'Name', # Name or title of the opportunity or deal | |
| 'AccountId', # Identifier for the related account (if available) | |
| 'OwnerId', # Identifier of the sales representative | |
| 'OwnerName', # Name of the sales representative | |
| 'Email', # Email of the sales representative | |
| 'Amount', # Potential revenue or value associated with the opportunity | |
| 'Probability', # Likelihood of winning the opportunity | |
| 'ExpectedRevenue', # Anticipated revenue (Amount * Probability) | |
| 'StageName', # Current stage of the opportunity | |
| 'CloseDate', # Expected or actual close date | |
| 'CreatedDate', # Date on which the opportunity was created | |
| 'CustomerType', # Type of customer (e.g., new or existing) | |
| 'LeadSource', # Source from which the opportunity originated | |
| 'ForecastCategory',# Forecasting category - whether in the pipeline or closed | |
| 'Topdealflag', # Indicator of whether it's a top deal | |
| 'Product' # Sales Copilot product tier | |
| ] | |
| # Reorder the DataFrame columns | |
| opportunities_df = opportunities_df[columns_order] | |
| # # Define the additional rows as a DataFrame | |
| additional_rows = pd.DataFrame({ | |
| 'Id': ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ'], | |
| 'Name': ['Delta Machineries', 'United Oil SLA', 'Dickenson Mobile Generators', 'Grand Hotels Kitchen Generator'], | |
| 'AccountId': ['001ak00000UEdzaAAD', '001ak00000UEdzaAAD', '001ak00000UEdzYAAT', '001ak00000UEdzZAAT'], | |
| 'OwnerId': ['005ak000007B2g5AAC', '005ak000007D4j7AAC', '005ak000007C3h6AAC', '005ak000007B1b5AAC'], | |
| 'OwnerName': ['Eva Green', 'Ivy Smith', 'Grace Black', 'Alice Johnson'], | |
| 'Email': ['eva.green@salescrm.com', 'ivy.smith@salescrm.com', 'grace.black@salescrm.com', 'alice.johnson@salescrm.com'], | |
| 'Amount': [125000, 120000, 15000, 15000], | |
| 'Probability': [90, 80, 10, 60], | |
| 'ExpectedRevenue': [112500, 96000, 1500, 9000], | |
| 'StageName': ['Negotiation/Review', 'Value Proposition', 'Qualification', 'Closed Lost'], | |
| 'CloseDate': [pd.NaT, '2024-09-30', pd.NaT, pd.NaT], | |
| 'CreatedDate': ['2024-01-28', '2024-02-11', '2024-02-03', '2024-01-08'], | |
| 'CustomerType': ['Existing Customer Upgrade', 'New Customer Acquisition', 'Existing Customer Upgrade', 'Existing Customer Upgrade'], | |
| 'LeadSource': [pd.NaT, 'Partner', 'Purchased List', pd.NaT], | |
| 'ForecastCategory': ['Pipeline', 'Pipeline', 'Pipeline', 'Pipeline'], | |
| 'Topdealflag': [True, True, False, False], | |
| 'Product': ['Sales Copilot Enterprise', 'Sales Copilot Enterprise', 'Sales Copilot Basic', 'Sales Copilot Basic'] | |
| }) | |
| # # Remove rows with the specified Opportunity IDs from the existing DataFrame | |
| ids_to_remove = ['006ak000004gjrLAAQ', '006ak000004gjrIAAQ', '006ak000004gjrFAAQ', '006ak000004gjrGAAQ'] | |
| opportunities_df = opportunities_df[~opportunities_df['Id'].isin(ids_to_remove)] | |
| # # Append the additional rows to the filtered DataFrame | |
| opportunities_df = pd.concat([opportunities_df, additional_rows], ignore_index=True) | |
| df_salesforce = opportunities_df.rename(columns={"Name": "Opportunity", "OwnerName": "Sales_Rep", "Amount": "Value", "StageName": "Stage", "Topdealflag": "Top_deal"}) | |
| st.write("connected live with Salesforce") | |
| st.dataframe(df_salesforce) | |
| #Build query engines | |
| #from llama_index.experimental.query_engine import PandasQueryEngine | |
| #from llama_index.core import PromptTemplate | |
| #df_salesforce = pd.read_csv('opportunities.csv') | |
| #df_salesforce = opportunities_df | |
| structured_query_engine = PandasQueryEngine(df=df_salesforce, verbose=True) | |
| #df_salesforce.head() | |
| #prompts = structured_query_engine.get_prompts() | |
| #print(prompts["pandas_prompt"].template) | |
| #from langchain.tools import BaseTool, StructuredTool, tool | |
| # Creating a query engine that will interact with the pandas DataFrame to perform queries | |
| # Defining a custom tool for querying the pandas DataFrame using natural language | |
| class PandasTool(BaseTool): | |
| name: str = "PandasTool" | |
| description: str = """ | |
| Use this tool to collect and analyze a dataset using Pandas. | |
| """ | |
| def _run(self, question_to_answer: str) -> str: | |
| context = self.pandas_query_engine(question_to_answer) | |
| return context | |
| def pandas_query_engine(self, question_to_answer: str) -> str: | |
| context = structured_query_engine.query(question_to_answer) | |
| return context | |
| # Instantiating the PandasTool for later use by agents | |
| pandas_tool = PandasTool() | |
| # Read documents from the specified directory and load a specific document | |
| documents = SimpleDirectoryReader("./").load_data("Combined-documents-internal-external.pdf") | |
| # Create a VectorStoreIndex object from the documents. This will involve processing the documents | |
| # and creating a vector representation for each of them, suitable for semantic searching. | |
| index = VectorStoreIndex.from_documents(documents) | |
| # Convert the VectorStoreIndex object into a query engine. This query engine can be used to | |
| # perform semantic searches on the index, matching natural language queries to the most relevant | |
| # documents in the index. | |
| document_query_engine = index.as_query_engine() | |
| # Defining a custom tool for searching the documents | |
| class DocumentsTool(BaseTool): | |
| name: str = "DocumentsTool" | |
| description: str = """ | |
| Use this tool to search the documents to find answers to questions or to summarize documents | |
| """ | |
| def _run(self, question_to_answer: str) -> str: | |
| context = self.unstructured_query_engine(question_to_answer) | |
| return context | |
| def unstructured_query_engine(self, question_to_answer: str) -> str: | |
| context = document_query_engine.query(question_to_answer) | |
| return context | |
| # Instantiating the DocumentsTool for later use by agents | |
| documents_tool = DocumentsTool() | |
| from langchain_openai import ChatOpenAI | |
| # load the language model we're using to control the agent | |
| llm = ChatOpenAI(model = "gpt-4",temperature=0) | |
| #llm = ChatOpenAI(model = "o1-preview",temperature=0) | |
| # load custom tools and a default tool | |
| tools = [pandas_tool, documents_tool] | |
| # initialize agent with the tools, language model, and the type of agent | |
| agent = initialize_agent(tools, | |
| llm=llm, | |
| agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, | |
| verbose=True | |
| ) | |
| if "messages" not in st.session_state: | |
| st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] | |
| for msg in st.session_state.messages: | |
| st.chat_message(msg["role"]).write(msg["content"]) | |
| if prompt := st.chat_input(): | |
| if not openai_api_key: | |
| st.info("Please add your OpenAI API key to continue.") | |
| st.stop() | |
| #client = OpenAI(api_key=openai_api_key) | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| st.chat_message("user").write(prompt) | |
| #response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages) | |
| #msg = response.choices[0].message.content | |
| msg = agent.run(prompt) | |
| st.session_state.messages.append({"role": "assistant", "content": msg}) | |
| st.chat_message("assistant").write(msg) | |
| # Get OpenAI API key from user | |
| #api_key = st.text_input("Enter your OpenAI API key:", type="password") | |
| #client = OpenAI(api_key=api_key) | |