Spaces:

mgreg555
/

docs_chat

Runtime error

App Files Files Community

mgreg555 commited on Mar 5, 2024

Commit

5bc88f0

verified ·

1 Parent(s): 051beb5

Upload 3 files

Browse files

Files changed (3) hide show

app.py +158 -0
constitution.pdf +0 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# -*- coding: utf-8 -*-
+"""Doc_chat_vegleges.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1G34ZCuupJZxNy-CFxjMNIa4_I3jynKqC
+# Setting up environment
+"""
+from PyPDF2 import PdfReader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
+# Get your API keys from openai, you will need to create an account.
+# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
+import os
+"""# Preprocessing document"""
+# location of the pdf file/files.
+reader = PdfReader('constitution.pdf')
+#reader = PdfReader('/content/WOW.pdf')
+#reader = PdfReader('/content/the_little_prince.pdf')
+# read data from the file
+raw_text = ''
+for i, page in enumerate(reader.pages):
+    text = page.extract_text()
+    if text:
+        raw_text += text
+# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.
+text_splitter = CharacterTextSplitter(
+    separator = "\n",
+    chunk_size = 1000,
+    chunk_overlap  = 200,
+    length_function = len,
+)
+texts = text_splitter.split_text(raw_text)
+len(texts)
+"""## Setting up doc search"""
+embeddings = OpenAIEmbeddings()
+doc_search = FAISS.from_texts(texts, embeddings)
+"""# Setting up chatbot"""
+from langchain.chains.question_answering import load_qa_chain
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.prompts import PromptTemplate
+from langchain_openai import OpenAI
+template = """You are a chatbot having a conversation with a human.
+Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
+Any questions outside of the document is irrelevant and you certanly dont know!
+{context}
+{chat_history}
+Human: {human_input}
+Chatbot:"""
+prompt = PromptTemplate(
+    input_variables=["chat_history", "human_input", "context"], template=template
+)
+memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
+chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)
+"""## The chatbot should know the answer"""
+query = "Who wrote the constitution?"
+docs = doc_search.similarity_search(query)
+chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+# query = "Acronyms?"
+# docs = doc_search.similarity_search(query)
+# chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+# query = "Say 3 of them"
+# docs = doc_search.similarity_search(query)
+# chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+"""## The chatbot should not know the answer."""
+query = "What is the capital of France?"
+docs = doc_search.similarity_search(query)
+chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+"""## Previous chatbot (deprecated)"""
+#print(chain.memory.buffer)
+# from langchain.chains.question_answering import load_qa_chain
+# from langchain.llms import OpenAI
+# embeddings = OpenAIEmbeddings()
+# doc_search = FAISS.from_texts(texts, embeddings)
+# chain = load_qa_chain(OpenAI(), chain_type="stuff")
+# query = "Who wrote the constitution?"
+# answer = chain.run(input_documents=docs, question=query)
+# print(answer)
+# query = "What is the capital of france?"
+# answer = chain.run(input_documents=docs, question=query)
+# print(answer)
+"""# Demo
+## Setting up methods
+"""
+def chat(query,history):
+  docs = doc_search.similarity_search(query)
+  return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+"""## Setting up UI with gradio"""
+import gradio as gr
+css = """
+body {
+    background-color: #FFFFFF; /* White background */
+}
+.gradio-chat-interface, .gradio-chat-input {
+    background-color: #E0FFFF; /* Light blue-green background */
+}
+"""
+gr.ChatInterface(
+    chat,
+    chatbot=gr.Chatbot(height=500),
+    title="Doc-chat",
+    description="Ask about the constitution!",
+    theme="soft",
+    examples=["Who wrote the constitution?","What is the capital of France?"],
+    cache_examples=True,
+    retry_btn=None,
+    undo_btn="Delete Previous",
+    clear_btn="Clear",
+    css=css
+).launch()

constitution.pdf ADDED Viewed

Binary file (414 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+langchain
+openai
+PyPDF2
+faiss-cpu
+tiktoken
+langchain_openai
+gradio