Spaces:

andytl13
/

proj2

Build error

proj2 / app.py

Andrew Lai

update

536c385 almost 2 years ago

4.36 kB

	from datasets import load_dataset
	from huggingface_hub import list_datasets
	from google.colab import userdata
	from langchain import OpenAI, LLMMathChain, SerpAPIWrapper
	from langchain.agents import initialize_agent, Tool, AgentExecutor
	from langchain_community.chat_models import ChatOpenAI
	from langchain.embeddings import CacheBackedEmbeddings
	from langchain.storage import LocalFileStore
	import os
	import chainlit as cl
	import openai
	from google.colab import userdata
	from dotenv import load_dotenv
	from langchain_community.document_loaders import TextLoader
	from langchain_community.document_loaders.csv_loader import CSVLoader
	from langchain_community.vectorstores import FAISS
	from langchain.storage import LocalFileStore
	from langchain.prompts import ChatPromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain.schema.runnable import RunnableMap
	from langchain.schema.output_parser import StrOutputParser
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.output_parsers import ResponseSchema, StructuredOutputParser
	import pandas as pd
	from langchain_openai import OpenAIEmbeddings
	import openai
	import asyncio
	from dotenv import dotenv_values

	# get keys
	my_secrets = dotenv_values("key.env")

	# download data
	#dataset = load_dataset("ShubhamChoksi/IMDB_Movies")
	#split_name = "train" # Change this to the split you want to save
	#data = dataset[split_name]

	# Convert the dataset to a pandas DataFrame
	#df = pd.DataFrame(data)

	# Define the path where you want to save the CSV file
	#csv_file_path = 'data.csv'

	# Save the DataFrame to a CSV file
	#df.to_csv(csv_file_path, index=False)

	#load the csv
	loader = TextLoader('data.csv')
	documents = loader.load()

	#split using recursive text splitter
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=100,
	length_function=len,
	is_separator_regex=False,
	)

	docs = text_splitter.split_documents(documents)

	# create embeddings
	underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"])
	store = LocalFileStore("./cache/")

	cached_embedder = CacheBackedEmbeddings.from_bytes_store(
	underlying_embeddings, store, namespace=underlying_embeddings.model
	)

	db = FAISS.from_documents(docs, cached_embedder)

	# Get the retriever for the Chat Model
	retriever = db.as_retriever(
	search_kwargs={"k": 10}
	)


	@cl.on_chat_start
	def start():

	# Create the prompt template make sure it doesn't return data not in rag
	template = """
	You're a helpful AI assistent tasked to answer the user's questions about movies.
	You can only make conversations based on the provided context about movies. If a response cannot be formed strictly using the context, politely say you don’t have knowledge about that topic under new line character 'ANSWER:' tag which is prefixed with new line character.

	Remember, you must return both an answer under 'ANSWER:' tag which is prefixed with new line character and citations in line separated format of answer and bulleted list of citiations under 'CITATIONS:' tag. A citation consists of a VERBATIM quote that \
	justifies the answer and the ID of the quoted article. Return a citation for every quote across all articles \
	that justify the answer. Add a new line character after all citations. Use the following format for your final output:

	new line character
	ANSWER:

	CITATIONS:
	new line character

	CONTEXT:
	{context}

	QUESTION: {question}

	YOUR ANSWER:
	"""

	prompt = ChatPromptTemplate.from_messages([("system", template)])

	llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=my_secrets["OPEN_API_KEY"])

	# Define the chain
	inputs = RunnableMap({
	'context': lambda x: retriever.get_relevant_documents(x['question']),
	'question': lambda x: x['question']
	})

	#create runnable chain
	runnable_chain = (
	inputs \|
	prompt \|
	llm \|
	StrOutputParser()
	)
	cl.user_session.set("runnable_chain", runnable_chain)


	@cl.on_message
	async def on_message(message: cl.Message):
	runnable_chain = cl.user_session.get("runnable_chain")
	msg = message.content

	result = runnable_chain.invoke({"question": msg})

	#print(str(result))
	await cl.Message(content=result).send()