Spaces:
Runtime error
Runtime error
Commit
·
4eaf3da
0
Parent(s):
first logic
Browse files- .env_example +2 -0
- .gitignore +2 -0
- __pycache__/database.cpython-311.pyc +0 -0
- __pycache__/preprocess.cpython-311.pyc +0 -0
- __pycache__/utilities.cpython-311.pyc +0 -0
- chatbot.py +76 -0
- database.py +15 -0
- preprocess.py +48 -0
- readme.md +3 -0
- requirements.txt +5 -0
- utilities.py +32 -0
.env_example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
REDIS_KEY = ''
|
| 2 |
+
OPENAI_API_KEY = ''
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
product_data.csv
|
| 2 |
+
.env
|
__pycache__/database.cpython-311.pyc
ADDED
|
Binary file (610 Bytes). View file
|
|
|
__pycache__/preprocess.cpython-311.pyc
ADDED
|
Binary file (1.35 kB). View file
|
|
|
__pycache__/utilities.cpython-311.pyc
ADDED
|
Binary file (2.19 kB). View file
|
|
|
chatbot.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.prompts import PromptTemplate
|
| 2 |
+
from langchain.llms import OpenAI
|
| 3 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 4 |
+
from langchain.chains import LLMChain
|
| 5 |
+
from langchain.memory import ConversationBufferMemory
|
| 6 |
+
from redis.commands.search.query import Query
|
| 7 |
+
import time
|
| 8 |
+
import os
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
import numpy as np
|
| 11 |
+
from database import redis_conn
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
|
| 16 |
+
prompt = PromptTemplate(
|
| 17 |
+
input_variables=["product_description"],
|
| 18 |
+
template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
| 22 |
+
|
| 23 |
+
userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
|
| 24 |
+
print("User:", userinput)
|
| 25 |
+
# Run the chain only specifying the input variable.
|
| 26 |
+
keywords = chain.run(userinput)
|
| 27 |
+
|
| 28 |
+
embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
|
| 29 |
+
#vectorize the query
|
| 30 |
+
query_vector = embedding_model.embed_query(keywords)
|
| 31 |
+
query_vector = np.array(query_vector).astype(np.float32).tobytes()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
#prepare the query
|
| 35 |
+
ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
|
| 36 |
+
topK=5
|
| 37 |
+
q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
|
| 38 |
+
params_dict = {"vec_param": query_vector}
|
| 39 |
+
#Execute the query
|
| 40 |
+
results = redis_conn.ft().search(q, query_params = params_dict)
|
| 41 |
+
|
| 42 |
+
full_result_string = ''
|
| 43 |
+
for product in results.docs:
|
| 44 |
+
full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
|
| 45 |
+
|
| 46 |
+
# code The response
|
| 47 |
+
template = """You are a chatbot. Be kind, detailed and nice. Present the given queried search result in a nice way as answer to the user input. dont ask questions back! just take the given context
|
| 48 |
+
|
| 49 |
+
{chat_history}
|
| 50 |
+
Human: {user_msg}
|
| 51 |
+
Chatbot:"""
|
| 52 |
+
|
| 53 |
+
prompt = PromptTemplate(
|
| 54 |
+
input_variables=["chat_history", "user_msg"],
|
| 55 |
+
template=template
|
| 56 |
+
)
|
| 57 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
| 58 |
+
llm_chain = LLMChain(
|
| 59 |
+
llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=os.getenv('OPENAI_API_KEY')),
|
| 60 |
+
prompt=prompt,
|
| 61 |
+
verbose=False,
|
| 62 |
+
memory=memory,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
|
| 66 |
+
print("Bot:", answer)
|
| 67 |
+
time.sleep(0.5)
|
| 68 |
+
|
| 69 |
+
while True:
|
| 70 |
+
follow_up = input("Anything else you want to ask about this topic?")
|
| 71 |
+
print("User:", follow_up)
|
| 72 |
+
answer = llm_chain.predict(
|
| 73 |
+
user_msg=follow_up
|
| 74 |
+
)
|
| 75 |
+
print("Bot:", answer)
|
| 76 |
+
time.sleep(0.5)
|
database.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import redis
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
redis_key = os.getenv('REDIS_KEY')
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
redis_conn = redis.Redis(
|
| 11 |
+
host='redis-10923.c10.us-east-1-4.ec2.cloud.redislabs.com',
|
| 12 |
+
port=10923,
|
| 13 |
+
password=redis_key)
|
| 14 |
+
|
| 15 |
+
print('connected to redis')
|
preprocess.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from database import redis_conn
|
| 7 |
+
from utilities import create_flat_index, load_vectors
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 11 |
+
|
| 12 |
+
#set maximum length for text fields
|
| 13 |
+
MAX_TEXT_LENGTH = 512
|
| 14 |
+
|
| 15 |
+
def auto_truncate(text:str):
|
| 16 |
+
return text[0:MAX_TEXT_LENGTH]
|
| 17 |
+
|
| 18 |
+
data = pd.read_csv('product_data.csv',converters={'bullet_point':auto_truncate,'item_keywords':auto_truncate,'item_name':auto_truncate})
|
| 19 |
+
data['primary_key'] = data['item_id'] + '-' + data['domain_name']
|
| 20 |
+
data.drop(columns=['item_id','domain_name'],inplace=True)
|
| 21 |
+
data['item_keywords'].replace('',np.nan,inplace=True)
|
| 22 |
+
data.dropna(subset=['item_keywords'],inplace=True)
|
| 23 |
+
data.reset_index(drop=True, inplace=True)
|
| 24 |
+
data_metadata = data.head(500).to_dict(orient='index')
|
| 25 |
+
|
| 26 |
+
#generating embeddings (vectors) for the item keywords
|
| 27 |
+
# embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
|
| 28 |
+
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
| 29 |
+
|
| 30 |
+
#get the item keywords attribute for each product and encode them into vector embeddings
|
| 31 |
+
item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
|
| 32 |
+
item_keywords_vectors = [embedding_model.embed_query(item) for item in item_keywords]
|
| 33 |
+
|
| 34 |
+
TEXT_EMBEDDING_DIMENSION=768
|
| 35 |
+
NUMBER_PRODUCTS=500
|
| 36 |
+
|
| 37 |
+
print ('Loading and Indexing + ' + str(NUMBER_PRODUCTS) + ' products')
|
| 38 |
+
#flush all data
|
| 39 |
+
redis_conn.flushall()
|
| 40 |
+
#create flat index & load vectors
|
| 41 |
+
create_flat_index(redis_conn,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE')
|
| 42 |
+
load_vectors(redis_conn,data_metadata,item_keywords_vectors)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
readme.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
An ***e-commerce chatBot*** which goes through the Amazon dataset products and suggests the most suitable goods according to the user needs.
|
| 2 |
+
By utilizing the power of product embeddings and large language models exploiting Langchain and Redis technologies, this chatbot acts as a real salesperson, can understand the client's request and efficiently search for relevant product recommendations based on the user description and present them in an engaging and informative manner.
|
| 3 |
+
**link to download the Amazon product dataset** : https://drive.google.com/file/d/1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj/view
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain == 0.0.242
|
| 2 |
+
openai == 0.27.8
|
| 3 |
+
redis == 5.0.1
|
| 4 |
+
pandas == 2.0.3
|
| 5 |
+
sentence-transformers == 2.2.2
|
utilities.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from redis import Redis
|
| 2 |
+
from redis.commands.search.field import VectorField
|
| 3 |
+
from redis.commands.search.field import TextField
|
| 4 |
+
from redis.commands.search.field import TagField
|
| 5 |
+
from redis.commands.search.result import Result
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def load_vectors(client:Redis, product_metadata, vector_dict):
|
| 9 |
+
p = client.pipeline(transaction=False)
|
| 10 |
+
for index in product_metadata.keys():
|
| 11 |
+
#hash key
|
| 12 |
+
key='product:'+ str(index)+ ':' + product_metadata[index]['primary_key']
|
| 13 |
+
|
| 14 |
+
#hash values
|
| 15 |
+
item_metadata = product_metadata[index]
|
| 16 |
+
item_keywords_vector = np.array(vector_dict[index], dtype=np.float32).tobytes()
|
| 17 |
+
item_metadata['item_vector']=item_keywords_vector
|
| 18 |
+
|
| 19 |
+
# HSET
|
| 20 |
+
p.hset(key,mapping=item_metadata)
|
| 21 |
+
|
| 22 |
+
p.execute()
|
| 23 |
+
|
| 24 |
+
def create_flat_index (redis_conn, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
|
| 25 |
+
redis_conn.ft().create_index([
|
| 26 |
+
VectorField('item_vector', "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
|
| 27 |
+
TagField("product_type"),
|
| 28 |
+
TextField("item_name"),
|
| 29 |
+
TextField("item_keywords"),
|
| 30 |
+
TagField("country")
|
| 31 |
+
])
|
| 32 |
+
|