Spaces:

Aryan619348
/

rag

Runtime error

App Files Files Community

Aryan619348 commited on Dec 27, 2023

Commit

cfd509f

1 Parent(s): e5c5fd1

Push code

Browse files

Files changed (4) hide show

app.py +51 -0
chain.py +30 -0
ingest.py +48 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+from ingest import configure_retriever
+from chain import my_chain
+def chatbot(input_text, history,uploaded_file):
+    if uploaded_file is not None:
+        ret=configure_retriever(uploaded_files=uploaded_file)
+    else:
+        ret =configure_retriever("")
+    response =my_chain(ret,input_text)
+    return response
+demo = gr.ChatInterface(chatbot,
+                        additional_inputs=[
+                            gr.File(file_types=["pdf", "csv"], file_count="multiple")
+                        ],
+                        title="RAG chain built using Langchain",
+                        description="Upload your documents in the additional input section and enjoy",
+                       )
+demo.launch()
+# import gradio as gr
+# from ingest import configure_retriever
+# from chain import my_chain
+# def chatbot(input_text, uploaded_file):
+#     # Your chatbot logic here
+#     print("checkpoint1")
+#     if uploaded_file is not None:
+#         # Process the uploaded file (you can replace this with your own logic)
+#         ret=configure_retriever(uploaded_files=uploaded_file)
+#     response =my_chain(ret,input_text)
+#     return response
+# iface = gr.Interface(
+#     fn=chatbot,
+#     inputs=[
+#         gr.Textbox(placeholder="Enter your text here"),
+#         gr.UploadButton("Click to Upload a File", file_types=["pdf", "csv"], file_count="multiple")
+#     ],
+#     outputs=gr.Textbox(label="Response")
+# )
+# iface.launch()

chain.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+def my_chain(retriever,question):
+    template = """Answer the question based only on the following context:
+    {context}
+    Question: {question}
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+    llm = ChatOpenAI(
+        model_name="gpt-3.5-turbo", temperature=0, streaming=True
+    )
+    chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    answer=chain.invoke(question)
+    return answer

ingest.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
+import tempfile
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders.csv_loader import CSVLoader
+from langchain.document_loaders import PyPDFLoader
+def check_file_type(file_path):
+    _, file_extension = os.path.splitext(file_path)
+    file_extension = file_extension.lower()
+    # Check if the file is a PDF
+    if file_extension == '.pdf':
+        return 1
+    # Check if the file is a CSV
+    if file_extension == '.csv':
+        return 2
+def configure_retriever(uploaded_files):
+    docs = []
+    temp_dir = tempfile.TemporaryDirectory()
+    for file in uploaded_files:
+        check = check_file_type(file)
+        if check ==1:
+            loader = PyPDFLoader(file)
+        if check ==2:
+            loader = CSVLoader(file)
+        docs.extend(loader.load())
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
+    splits = text_splitter.split_documents(docs)
+    # Create embeddings and store in vectordb
+    embeddings = OpenAIEmbeddings()
+    vectordb = FAISS.from_documents(splits, embeddings)
+    # Define retriever
+    retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4})
+    print("embeddings created")
+    return retriever

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+openai
+langchain
+faiss-cpu
+tiktoken
+python-dotenv
+pillow
+langchain-core
+langchain-experimental
+tabulate
+pypdf
+gradio