rohanshaw commited on
Commit
9239b12
·
verified ·
1 Parent(s): 2b11763

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +68 -68
chatbot.py CHANGED
@@ -1,68 +1,68 @@
1
- from langchain.text_splitter import CharacterTextSplitter
2
- from langchain_community.document_loaders import TextLoader
3
- from langchain.schema.runnable import RunnablePassthrough
4
- from langchain.schema.output_parser import StrOutputParser
5
- from langchain_pinecone import PineconeVectorStore
6
- from langchain.prompts import PromptTemplate
7
- from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
- from dotenv import load_dotenv, find_dotenv
9
- import os
10
- from pinecone import Pinecone, PodSpec
11
-
12
- load_dotenv(find_dotenv())
13
-
14
- class Chatbot():
15
-
16
- loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
- documents = loader.load()
18
- text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4)
19
- docs = text_splitter.split_documents(documents)
20
-
21
- embeddings = GoogleGenerativeAIEmbeddings(
22
- model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
23
- )
24
-
25
- pinecone = Pinecone(
26
- api_key=os.environ.get("PINECONE_API_KEY")
27
- # host='gcp-starter'
28
- )
29
-
30
- index_name = "thehexatechchatbot"
31
-
32
- if index_name not in pinecone.list_indexes().names():
33
- pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
34
- docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
35
- else:
36
- docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
-
38
-
39
- llm = GoogleGenerativeAI(model="Gemini 2.5 Flash-Lite Preview 06-17", google_api_key=os.getenv("GEMINI_API_KEY"))
40
-
41
- template = """
42
- INSTRUCTION: Act as Customer Support chatbot of The Hexatech, an IT Startup that provides Daas \
43
- to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
- If you don't know any ANSWER, say you don't know \
45
- Always follow general guardrails before generating any response. \
46
- Always try to keep the conversation in context to The Hexatech. Keep your replies short \
47
- compassionate and informative.\
48
- Give the answer from the CONTEXT\
49
- You should help user to get his query solved and also try to increase engagement and promoting The Hexatech & its services.\
50
-
51
- CONTEXT: {context}
52
- QUESTION: {question}
53
- ANSWER:
54
- """
55
-
56
- prompt = PromptTemplate(
57
- template=template,
58
- input_variables=["context", "question"]
59
- )
60
-
61
-
62
- rag_chain = (
63
- {"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
64
- | prompt
65
- | llm
66
- | StrOutputParser()
67
- )
68
-
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class Chatbot():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
23
+ )
24
+
25
+ pinecone = Pinecone(
26
+ api_key=os.environ.get("PINECONE_API_KEY")
27
+ # host='gcp-starter'
28
+ )
29
+
30
+ index_name = "thehexatechchatbot"
31
+
32
+ if index_name not in pinecone.list_indexes().names():
33
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
34
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
35
+ else:
36
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
+
38
+
39
+ llm = GoogleGenerativeAI(model="gemini-2.5-flash-lite-preview-06-17", google_api_key=os.getenv("GEMINI_API_KEY"))
40
+
41
+ template = """
42
+ INSTRUCTION: Act as Customer Support chatbot of The Hexatech, an IT Startup that provides Daas \
43
+ to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
+ If you don't know any ANSWER, say you don't know \
45
+ Always follow general guardrails before generating any response. \
46
+ Always try to keep the conversation in context to The Hexatech. Keep your replies short \
47
+ compassionate and informative.\
48
+ Give the answer from the CONTEXT\
49
+ You should help user to get his query solved and also try to increase engagement and promoting The Hexatech & its services.\
50
+
51
+ CONTEXT: {context}
52
+ QUESTION: {question}
53
+ ANSWER:
54
+ """
55
+
56
+ prompt = PromptTemplate(
57
+ template=template,
58
+ input_variables=["context", "question"]
59
+ )
60
+
61
+
62
+ rag_chain = (
63
+ {"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
64
+ | prompt
65
+ | llm
66
+ | StrOutputParser()
67
+ )
68
+