| | from datasets import load_dataset |
| | from huggingface_hub import list_datasets |
| | from google.colab import userdata |
| | from langchain import OpenAI, LLMMathChain, SerpAPIWrapper |
| | from langchain.agents import initialize_agent, Tool, AgentExecutor |
| | from langchain_community.chat_models import ChatOpenAI |
| | from langchain.embeddings import CacheBackedEmbeddings |
| | from langchain.storage import LocalFileStore |
| | import os |
| | import chainlit as cl |
| | import openai |
| | from google.colab import userdata |
| | from dotenv import load_dotenv |
| | from langchain_community.document_loaders import TextLoader |
| | from langchain_community.document_loaders.csv_loader import CSVLoader |
| | from langchain_community.vectorstores import FAISS |
| | from langchain.storage import LocalFileStore |
| | from langchain.prompts import ChatPromptTemplate |
| | from langchain_openai import ChatOpenAI |
| | from langchain.schema.runnable import RunnableMap |
| | from langchain.schema.output_parser import StrOutputParser |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain.output_parsers import ResponseSchema, StructuredOutputParser |
| | import pandas as pd |
| | from langchain_openai import OpenAIEmbeddings |
| | import openai |
| | import asyncio |
| | from dotenv import dotenv_values |
| |
|
| | |
| | my_secrets = dotenv_values("key.env") |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | loader = TextLoader('data.csv') |
| | documents = loader.load() |
| |
|
| | |
| | text_splitter = RecursiveCharacterTextSplitter( |
| | chunk_size=1000, |
| | chunk_overlap=100, |
| | length_function=len, |
| | is_separator_regex=False, |
| | ) |
| |
|
| | docs = text_splitter.split_documents(documents) |
| |
|
| | |
| | underlying_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",api_key=my_secrets["OPEN_API_KEY"]) |
| | store = LocalFileStore("./cache/") |
| |
|
| | cached_embedder = CacheBackedEmbeddings.from_bytes_store( |
| | underlying_embeddings, store, namespace=underlying_embeddings.model |
| | ) |
| |
|
| | db = FAISS.from_documents(docs, cached_embedder) |
| |
|
| | |
| | retriever = db.as_retriever( |
| | search_kwargs={"k": 10} |
| | ) |
| |
|
| |
|
| | @cl.on_chat_start |
| | def start(): |
| |
|
| | |
| | template = """ |
| | You're a helpful AI assistent tasked to answer the user's questions about movies. |
| | You can only make conversations based on the provided context about movies. If a response cannot be formed strictly using the context, politely say you don’t have knowledge about that topic under new line character 'ANSWER:' tag which is prefixed with new line character. |
| | |
| | Remember, you must return both an answer under 'ANSWER:' tag which is prefixed with new line character and citations in line separated format of answer and bulleted list of citiations under 'CITATIONS:' tag. A citation consists of a VERBATIM quote that \ |
| | justifies the answer and the ID of the quoted article. Return a citation for every quote across all articles \ |
| | that justify the answer. Add a new line character after all citations. Use the following format for your final output: |
| | |
| | new line character |
| | ANSWER: |
| | |
| | CITATIONS: |
| | new line character |
| | |
| | CONTEXT: |
| | {context} |
| | |
| | QUESTION: {question} |
| | |
| | YOUR ANSWER: |
| | """ |
| |
|
| | prompt = ChatPromptTemplate.from_messages([("system", template)]) |
| |
|
| | llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=my_secrets["OPEN_API_KEY"]) |
| |
|
| | |
| | inputs = RunnableMap({ |
| | 'context': lambda x: retriever.get_relevant_documents(x['question']), |
| | 'question': lambda x: x['question'] |
| | }) |
| |
|
| | |
| | runnable_chain = ( |
| | inputs | |
| | prompt | |
| | llm | |
| | StrOutputParser() |
| | ) |
| | cl.user_session.set("runnable_chain", runnable_chain) |
| |
|
| |
|
| | @cl.on_message |
| | async def on_message(message: cl.Message): |
| | runnable_chain = cl.user_session.get("runnable_chain") |
| | msg = message.content |
| |
|
| | result = runnable_chain.invoke({"question": msg}) |
| |
|
| | |
| | await cl.Message(content=result).send() |
| |
|