deltav2 / chatbot.py
rohanshaw's picture
Upload 6 files
5479033 verified
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv, find_dotenv
import os
from pinecone import Pinecone, PodSpec
load_dotenv(find_dotenv())
class Chatbot():
loader = TextLoader('dataset.txt', autodetect_encoding=True)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=4)
docs = text_splitter.split_documents(documents)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
)
pinecone = Pinecone(
api_key=os.environ.get("PINECONE_API_KEY")
# host='gcp-starter'
)
index_name = "gdscsou-chatbot"
if index_name not in pinecone.list_indexes().names():
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
else:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
template = """
INSTRUCTION: Act as Delta a community support chatbot for Google Developer Student Clubs, Silver Oak University alias GDSC SOU, this is conversation \
to a community member. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
Don't forget you are a Community support chatbot for Google Developer Student Clubs, Silver Oak University. \
If you don't know any ANSWER, say you don't know \
Always follow general guardrails before generating any response. \
Always try to keep the conversation in context to GDSC SOU. Keep your replies short \
compassionate and informative.\
Give the answer from the CONTEXT\
You should help user to get his query solved and also try to increase engagement for GDSC SOU by also promoting GDSC SOU.\
CONTEXT: {context}
QUESTION: {question}
ANSWER:
"""
prompt = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
rag_chain = (
{"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)