jawad2412 commited on
Commit
1f289e4
·
verified ·
1 Parent(s): b90f533

Upload 3 files

Browse files
Files changed (3) hide show
  1. chatbot.py +67 -0
  2. company_data.pdf +0 -0
  3. requirements.txt +7 -1
chatbot.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain_groq import ChatGroq
9
+ import gradio as gr
10
+
11
+ # Load Groq API key from env variables
12
+ groq_api_key = os.getenv("GROQ_API_KEY")
13
+
14
+ def load_and_index_pdf(pdf_path="company_data.pdf"):
15
+ loader = PyPDFLoader(pdf_path)
16
+ documents = loader.load()
17
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
18
+ texts = splitter.split_documents(documents)
19
+ embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
20
+ db = FAISS.from_documents(texts, embedding)
21
+ db.save_local("company_faiss_index")
22
+
23
+ def setup_qa():
24
+ embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
25
+ if not os.path.exists("company_faiss_index"):
26
+ load_and_index_pdf()
27
+ db = FAISS.load_local("company_faiss_index", embedding, allow_dangerous_deserialization=True)
28
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
29
+
30
+ llm = ChatGroq(model_name="llama3-70b-8192", api_key=groq_api_key)
31
+
32
+ prompt = PromptTemplate.from_template("""
33
+ You are a helpful assistant for a digital marketing company.
34
+ Try to answer the user's question based on the provided context from the company document.
35
+ If the answer is not found in the context, provide a helpful and accurate answer from your own knowledge, focusing on digital marketing topics.
36
+
37
+ Context:
38
+ {context}
39
+
40
+ Question:
41
+ {question}
42
+ """)
43
+
44
+ qa_chain = RetrievalQA.from_chain_type(
45
+ llm=llm,
46
+ retriever=retriever,
47
+ return_source_documents=False,
48
+ chain_type_kwargs={"prompt": prompt}
49
+ )
50
+ return qa_chain
51
+
52
+ qa_chain = setup_qa()
53
+
54
+ def answer_question(query):
55
+ result = qa_chain.invoke(query)
56
+ return result['result']
57
+
58
+ # Minimal Gradio UI
59
+ iface = gr.Interface(
60
+ fn=answer_question,
61
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about digital marketing..."),
62
+ outputs="text",
63
+ title="CLick Media Lab Chatbot"
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ iface.launch()
company_data.pdf ADDED
Binary file (2.87 kB). View file
 
requirements.txt CHANGED
@@ -1 +1,7 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
1
+ langchain==0.0.250
2
+ langchain-huggingface
3
+ langchain-community
4
+ langchain-groq
5
+ gradio
6
+ faiss-cpu
7
+ python-dotenv