| from langchain.text_splitter import CharacterTextSplitter |
| from langchain_community.document_loaders import TextLoader |
| from langchain.schema.runnable import RunnablePassthrough |
| from langchain.schema.output_parser import StrOutputParser |
| from langchain_pinecone import PineconeVectorStore |
| from langchain.prompts import PromptTemplate |
| from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings |
| from dotenv import load_dotenv, find_dotenv |
| import os |
| from pinecone import Pinecone, PodSpec |
|
|
| load_dotenv(find_dotenv()) |
|
|
| class Chatbot(): |
|
|
| loader = TextLoader('dataset.txt', autodetect_encoding=True) |
| documents = loader.load() |
| text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=4) |
| docs = text_splitter.split_documents(documents) |
|
|
| embeddings = GoogleGenerativeAIEmbeddings( |
| model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY") |
| ) |
|
|
| pinecone = Pinecone( |
| api_key=os.environ.get("PINECONE_API_KEY") |
| |
| ) |
|
|
| index_name = "gdscsou-chatbot" |
|
|
| if index_name not in pinecone.list_indexes().names(): |
| pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) |
| docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) |
| else: |
| docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) |
|
|
|
|
| llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) |
|
|
| template = """ |
| INSTRUCTION: Act as Delta a community support chatbot for Google Developer Student Clubs, Silver Oak University alias GDSC SOU, this is conversation \ |
| to a community member. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ |
| Don't forget you are a Community support chatbot for Google Developer Student Clubs, Silver Oak University. \ |
| If you don't know any ANSWER, say you don't know \ |
| Always follow general guardrails before generating any response. \ |
| Always try to keep the conversation in context to GDSC SOU. Keep your replies short \ |
| compassionate and informative.\ |
| Give the answer from the CONTEXT\ |
| You should help user to get his query solved and also try to increase engagement for GDSC SOU by also promoting GDSC SOU.\ |
| |
| CONTEXT: {context} |
| QUESTION: {question} |
| ANSWER: |
| """ |
|
|
| prompt = PromptTemplate( |
| template=template, |
| input_variables=["context", "question"] |
| ) |
|
|
|
|
| rag_chain = ( |
| {"context": docsearch.as_retriever(), "question": RunnablePassthrough()} |
| | prompt |
| | llm |
| | StrOutputParser() |
| ) |
|
|
|
|