Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """Doc_chat_vegleges_like.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV | |
| # Setting up environment | |
| """ | |
| from PyPDF2 import PdfReader | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate | |
| from langchain_community.vectorstores import FAISS | |
| # Get your API keys from openai, you will need to create an account. | |
| # Here is the link to get the keys: https://platform.openai.com/account/billing/overview | |
| import os | |
| print(os.environ["OPENAI_API_KEY"]) | |
| """# Preprocessing document""" | |
| # location of the pdf file/files. | |
| reader = PdfReader('The_Little_Prince.pdf') | |
| # read data from the file | |
| raw_text = '' | |
| for i, page in enumerate(reader.pages): | |
| text = page.extract_text() | |
| if text: | |
| raw_text += text | |
| # We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits. | |
| text_splitter = CharacterTextSplitter( | |
| separator = "\n", | |
| chunk_size = 800, | |
| chunk_overlap = 150, | |
| length_function = len, | |
| ) | |
| texts = text_splitter.split_text(raw_text) | |
| len(texts) | |
| """## Setting up doc search""" | |
| embeddings = OpenAIEmbeddings() | |
| doc_search = FAISS.from_texts(texts, embeddings) | |
| """# Setting up chatbot""" | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.memory import ConversationBufferWindowMemory | |
| from langchain.prompts import PromptTemplate | |
| from langchain_openai import OpenAI | |
| template = """You are a chatbot having a conversation with a human. | |
| Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else. | |
| If You cannot find the answer say "The document does not contain that information." | |
| {context} | |
| {chat_history} | |
| Human: {human_input} | |
| Chatbot:""" | |
| prompt = PromptTemplate( | |
| input_variables=["chat_history", "human_input", "context"], template=template | |
| ) | |
| memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3) | |
| chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt) | |
| """# Demo | |
| ## Setting up methods | |
| """ | |
| def chat(query,history): | |
| docs = doc_search.similarity_search(query) | |
| return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text'] | |
| """## Setting up UI with gradio""" | |
| import gradio as gr | |
| from huggingface_hub import HfFileSystem | |
| fs = HfFileSystem(token=os.environ.get('DATASET_ACCES')) | |
| def write_to_file(file_name,content): | |
| file_path = f"datasets/mgreg555/Little_Prince/" + file_name | |
| with fs.open(file_path, "r") as file_old: | |
| content_old = file_old.read() | |
| print(content_old) | |
| with fs.open(file_path, "w") as file: | |
| file.write(f"{content_old}\n" + content) | |
| def vote(tmp, index_state, data: gr.LikeData): | |
| value_new = data.value | |
| index_new = data.index | |
| file_name = 'good.txt' if data.liked else 'bad.txt' | |
| write_to_file(file_name, value_new + ';' + find_previous_question(value_new)) | |
| def find_previous_question(answer_string): | |
| lines = chain.memory.buffer.split('\n') | |
| last_question = None | |
| current_question = None | |
| for line in lines: | |
| if line.startswith('Human:'): | |
| current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix | |
| elif line.startswith('AI:') and line[3:].strip() == answer_string: | |
| return current_question # Return the previous question when the answer is found | |
| return None | |
| chatbot = gr.Chatbot(height=600, likeable=True) | |
| # Use gradio.Blocks to create a context for your components and event listeners | |
| with gr.Blocks() as demo: | |
| index_state = gr.State(value=[]) | |
| tmp = gr.Textbox(visible=False, value="") | |
| gr.ChatInterface( | |
| chat, | |
| chatbot=chatbot, | |
| title="Doc-chat", | |
| description="Ask about The Little Prince!", | |
| theme="soft", | |
| examples=["Who is the Little Prince?","What is the capital of France?"], | |
| cache_examples=True, | |
| retry_btn=None, | |
| undo_btn="Delete Previous", | |
| clear_btn="Clear", | |
| ) | |
| chatbot.like(vote, [tmp, index_state], [tmp, index_state]) | |
| demo.launch() |