Spaces:

mgreg555
/

docs_chat

Runtime error

App Files Files Community

mgreg555 commited on Mar 6, 2024

Commit

fd9ca75

verified ·

1 Parent(s): 5bc88f0

Upload doc_chat_vegleges_like.py

Browse files

Files changed (1) hide show

doc_chat_vegleges_like.py +138 -0

doc_chat_vegleges_like.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# -*- coding: utf-8 -*-
+"""Doc_chat_vegleges_like.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV
+# Setting up environment
+"""
+from PyPDF2 import PdfReader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
+# Get your API keys from openai, you will need to create an account.
+# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
+import os
+"""# Preprocessing document"""
+# location of the pdf file/files.
+reader = PdfReader('samu-en-567.pdf')
+#reader = PdfReader('/content/WOW.pdf')
+#reader = PdfReader('/content/the_little_prince.pdf')
+#reader = PdfReader('/content/constitution.pdf')
+# read data from the file
+raw_text = ''
+for i, page in enumerate(reader.pages):
+    text = page.extract_text()
+    if text:
+        raw_text += text
+# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.
+text_splitter = CharacterTextSplitter(
+    separator = "\n",
+    chunk_size = 1000,
+    chunk_overlap  = 200,
+    length_function = len,
+)
+texts = text_splitter.split_text(raw_text)
+len(texts)
+"""## Setting up doc search"""
+embeddings = OpenAIEmbeddings()
+doc_search = FAISS.from_texts(texts, embeddings)
+"""# Setting up chatbot"""
+from langchain.chains.question_answering import load_qa_chain
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.prompts import PromptTemplate
+from langchain_openai import OpenAI
+template = """You are a chatbot having a conversation with a human.
+Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
+Any questions outside of the document is irrelevant and you certanly dont know! If You cannot find the answer say "The document does not contain that information."
+{context}
+{chat_history}
+Human: {human_input}
+Chatbot:"""
+prompt = PromptTemplate(
+    input_variables=["chat_history", "human_input", "context"], template=template
+)
+memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
+chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)
+"""# Demo
+## Setting up methods
+"""
+def chat(query,history):
+  docs = doc_search.similarity_search(query)
+  return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
+"""## Setting up UI with gradio"""
+import gradio as gr
+def write_to_file(file_name, value):
+    with open(file_name, 'a', encoding='utf-8') as file:
+        file.write(find_previous_question(value) + ';' + str(value) + '\n')
+def vote(tmp, index_state, data: gr.LikeData):
+    value_new = data.value
+    index_new = data.index
+    file_name = 'good.txt' if data.liked else 'bad.txt'
+    write_to_file(file_name, value_new)
+def find_previous_question(answer_string):
+    # Split the chat string into lines
+    lines = chain.memory.buffer.split('\n')
+    # Initialize variables to keep track of the last question and the current question
+    last_question = None
+    current_question = None
+    for line in lines:
+        if line.startswith('Human:'):
+            current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix
+        elif line.startswith('AI:') and line[3:].strip() == answer_string:
+            return current_question # Return the previous question when the answer is found
+    return None
+chatbot = gr.Chatbot(height=600, likeable=True)
+# Use gradio.Blocks to create a context for your components and event listeners
+with gr.Blocks() as demo:
+    index_state = gr.State(value=[])
+    tmp = gr.Textbox(visible=False, value="")
+    gr.ChatInterface(
+        chat,
+        chatbot=chatbot,
+        title="Doc-chat",
+        description="Ask about the constitution!",
+        theme="soft",
+        examples=["Who wrote the constitution?","What is the capital of France?"],
+        cache_examples=True,
+        retry_btn=None,
+        undo_btn="Delete Previous",
+        clear_btn="Clear",
+    )
+    chatbot.like(vote, [tmp, index_state], [tmp, index_state])
+demo.launch()