Aryan619348 commited on
Commit
cfd509f
·
1 Parent(s): e5c5fd1
Files changed (4) hide show
  1. app.py +51 -0
  2. chain.py +30 -0
  3. ingest.py +48 -0
  4. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ingest import configure_retriever
3
+ from chain import my_chain
4
+
5
+ def chatbot(input_text, history,uploaded_file):
6
+
7
+ if uploaded_file is not None:
8
+ ret=configure_retriever(uploaded_files=uploaded_file)
9
+ else:
10
+ ret =configure_retriever("")
11
+
12
+ response =my_chain(ret,input_text)
13
+
14
+ return response
15
+
16
+ demo = gr.ChatInterface(chatbot,
17
+ additional_inputs=[
18
+ gr.File(file_types=["pdf", "csv"], file_count="multiple")
19
+ ],
20
+ title="RAG chain built using Langchain",
21
+ description="Upload your documents in the additional input section and enjoy",
22
+ )
23
+
24
+ demo.launch()
25
+
26
+ # import gradio as gr
27
+ # from ingest import configure_retriever
28
+ # from chain import my_chain
29
+
30
+ # def chatbot(input_text, uploaded_file):
31
+ # # Your chatbot logic here
32
+
33
+ # print("checkpoint1")
34
+ # if uploaded_file is not None:
35
+ # # Process the uploaded file (you can replace this with your own logic)
36
+ # ret=configure_retriever(uploaded_files=uploaded_file)
37
+
38
+ # response =my_chain(ret,input_text)
39
+
40
+ # return response
41
+
42
+ # iface = gr.Interface(
43
+ # fn=chatbot,
44
+ # inputs=[
45
+ # gr.Textbox(placeholder="Enter your text here"),
46
+ # gr.UploadButton("Click to Upload a File", file_types=["pdf", "csv"], file_count="multiple")
47
+ # ],
48
+ # outputs=gr.Textbox(label="Response")
49
+ # )
50
+
51
+ # iface.launch()
chain.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+ os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
5
+
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+
11
+
12
+ def my_chain(retriever,question):
13
+ template = """Answer the question based only on the following context:
14
+ {context}
15
+ Question: {question}
16
+ """
17
+ prompt = ChatPromptTemplate.from_template(template)
18
+
19
+ llm = ChatOpenAI(
20
+ model_name="gpt-3.5-turbo", temperature=0, streaming=True
21
+ )
22
+
23
+ chain = (
24
+ {"context": retriever, "question": RunnablePassthrough()}
25
+ | prompt
26
+ | llm
27
+ | StrOutputParser()
28
+ )
29
+ answer=chain.invoke(question)
30
+ return answer
ingest.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+ os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
5
+ import tempfile
6
+ from langchain.embeddings import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ import streamlit as st
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.document_loaders.csv_loader import CSVLoader
11
+ from langchain.document_loaders import PyPDFLoader
12
+
13
+
14
+
15
+ def check_file_type(file_path):
16
+ _, file_extension = os.path.splitext(file_path)
17
+ file_extension = file_extension.lower()
18
+
19
+ # Check if the file is a PDF
20
+ if file_extension == '.pdf':
21
+ return 1
22
+ # Check if the file is a CSV
23
+ if file_extension == '.csv':
24
+ return 2
25
+
26
+
27
+ def configure_retriever(uploaded_files):
28
+ docs = []
29
+ temp_dir = tempfile.TemporaryDirectory()
30
+ for file in uploaded_files:
31
+ check = check_file_type(file)
32
+ if check ==1:
33
+ loader = PyPDFLoader(file)
34
+ if check ==2:
35
+ loader = CSVLoader(file)
36
+ docs.extend(loader.load())
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
38
+ splits = text_splitter.split_documents(docs)
39
+
40
+ # Create embeddings and store in vectordb
41
+ embeddings = OpenAIEmbeddings()
42
+ vectordb = FAISS.from_documents(splits, embeddings)
43
+
44
+ # Define retriever
45
+ retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4})
46
+ print("embeddings created")
47
+ return retriever
48
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ langchain
3
+ faiss-cpu
4
+ tiktoken
5
+ python-dotenv
6
+ pillow
7
+ langchain-core
8
+ langchain-experimental
9
+ tabulate
10
+ pypdf
11
+ gradio