Spaces:

jitubutwal1441
/

eshop-chat

Runtime error

jitendra.kasaudhan

Add product url instead of image

207907e over 2 years ago

6.68 kB

	from langchain import PromptTemplate, OpenAI, LLMChain
	from langchain.chat_models import ChatOpenAI
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain.chains import RetrievalQAWithSourcesChain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document

	import chainlit as cl
	from chainlit import user_session
	import pandas as pd

	persist_directory = "vector_db"

	template = """Question: {question}

	Answer: Let's think step by step."""

	# Get processed data from a json file
	# PRODUCTS_DATA = pd.read_json('data/bestbuy-dataset-products.json').sample(n=3).to_dict(orient='records')
	PRODUCTS_DATA = []

	@cl.on_chat_start
	def main():
	# Instantiate the chain for that user session
	# prompt = PromptTemplate(template=template, input_variables=["question"])
	# llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0), verbose=True)

	# Create a Chroma vector store
	embeddings = OpenAIEmbeddings(
	disallowed_special=(),
	)

	# products_data = [
	# {"sku":43900, "name":"Duracell - AAA Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AAA Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Compatible with select electronic devices; AAA size; DURALOCK Power Preserve technology; 4-pack.\n\n Manufacturer of the product is Duracell and price is 5.49.\n ", "url": "a.com"},
	# {"sku":48530,"name":"Duracell - AA 1.5V CopperTop Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA 1.5V CopperTop Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Long-lasting energy; DURALOCK Power Preserve technology; for toys, clocks, radios, games, remotes, PDAs and more.\n\n Manufacturer of the product is Duracell and price is 5.49.\n ","url": "b.com"},
	# {"sku":127687,"name":"Duracell - AA Batteries (8-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA Batteries (8-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Compatible with select electronic devices; AA size; DURALOCK Power Preserve technology; 8-pack.\n\n Manufacturer of the product is Duracell and price is 7.49.\n ","url": "c.com"}
	# ]

	# products_data = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
	PRODUCTS_DATA = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size = 1000,
	chunk_overlap = 20,
	length_function = len,
	)

	for item in PRODUCTS_DATA:
	product_summary_data = item["product_spec_in_natural_language"]
	docs = [
	Document(page_content=product_summary_data,
	metadata={"source": item["sku"], "name": item["name"], "url": item['url'], "image": item["image"]})
	]

	documents = text_splitter.split_documents(docs)
	vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)

	vectordb.persist()

	# chroma_data_collection= {
	# # embeddings=[[1.2, 2.3, 4.5], [6.7, 8.2, 9.2]],
	# documents: [products_data[0]["product_spec_in_natural_language"], products_data[1]["product_spec_in_natural_language"], products_data[2]["product_spec_in_natural_language"]],
	# metadatas: [{"source": "43900"}, {"source": "48530"}, {"source": "127687"}],
	# ids: ["43900", "48530", "127687"]
	# }

	# vectordb = None

	# Create a chain that uses the Chroma vector store
	chain = RetrievalQAWithSourcesChain.from_chain_type(
	ChatOpenAI(
	model_name="gpt-3.5-turbo",
	temperature=0,
	),
	chain_type="stuff",
	retriever=vectordb.as_retriever(),
	return_source_documents=True,
	)

	# Store the chain in the user session
	cl.user_session.set("llm_chain", chain)


	@cl.on_message
	async def main(message: str):
	# Retrieve the chain from the user session
	llm_chain = cl.user_session.get("llm_chain") # type: LLMChain

	# Call the chain asynchronously
	res = await llm_chain.acall(message, callbacks=[cl.AsyncLangchainCallbackHandler()])

	# Do any post processing here
	print(res)
	answer = res["answer"]

	source_elements_dict = {}
	source_elements = []
	for idx, source in enumerate(res["source_documents"]):
	doc_id = source.metadata["source"]

	# Get data using unique id of a product, so that we don't have to save
	# unnecessary metadata in vecotor store
	# product_df = pd.DataFrame(PRODUCTS_DATA)
	# product = product_df.where(product_df['sku'] == f"{doc_id}")
	# print('########', f"{doc_id}")
	# print(product)

	if doc_id not in source_elements_dict:
	source_elements_dict[doc_id] = {
	"url": source.metadata.get("url"),
	"name": source.metadata.get("name"),
	"image": source.metadata.get("image"),
	}

	for key, values in source_elements_dict.items():
	# product_links = ", ".join([str(x) for x in links])
	text_for_source = f"Product url: {values['url']}\n"

	# if values["image"] is not None:
	# source_elements.append(cl.Image(name="Image", display="inline", url=values["image"], size="small"))

	# source_elements.append(cl.Text(name=values["name"], content=text_for_source, display="inline"))

	source_elements = [
	# cl.Image(url=values["image"], name="image1", display="inline"),
	cl.Text(content=text_for_source, name=values["name"], display="inline"),
	]

	not_found_indicators = ["not mentioned", "no mention", "not specified", "no information"]
	if any([text in answer.lower() for text in not_found_indicators]):
	# If product not found, do not show any product urls
	source_elements = []

	# This varies from chain to chain, you should check which key to read.
	await cl.Message(content=answer, elements=source_elements).send()