bme_prompt_eng / app.py
mgreg555's picture
Update app.py
aef8e61 verified
# -*- coding: utf-8 -*-
"""Doc_chat_vegleges_like.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV
# Setting up environment
"""
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate
from langchain_community.vectorstores import FAISS
# Get your API keys from openai, you will need to create an account.
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
import os
print(os.environ["OPENAI_API_KEY"])
"""# Preprocessing document"""
# location of the pdf file/files.
reader = PdfReader('The_Little_Prince.pdf')
# read data from the file
raw_text = ''
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text:
raw_text += text
# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 800,
chunk_overlap = 150,
length_function = len,
)
texts = text_splitter.split_text(raw_text)
len(texts)
"""## Setting up doc search"""
embeddings = OpenAIEmbeddings()
doc_search = FAISS.from_texts(texts, embeddings)
"""# Setting up chatbot"""
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI
template = """You are a chatbot having a conversation with a human.
Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
If You cannot find the answer say "The document does not contain that information."
{context}
{chat_history}
Human: {human_input}
Chatbot:"""
prompt = PromptTemplate(
input_variables=["chat_history", "human_input", "context"], template=template
)
memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)
"""# Demo
## Setting up methods
"""
def chat(query,history):
docs = doc_search.similarity_search(query)
return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
"""## Setting up UI with gradio"""
import gradio as gr
from huggingface_hub import HfFileSystem
fs = HfFileSystem(token=os.environ.get('DATASET_ACCES'))
def write_to_file(file_name,content):
file_path = f"datasets/mgreg555/Little_Prince/" + file_name
with fs.open(file_path, "r") as file_old:
content_old = file_old.read()
print(content_old)
with fs.open(file_path, "w") as file:
file.write(f"{content_old}\n" + content)
def vote(tmp, index_state, data: gr.LikeData):
value_new = data.value
index_new = data.index
file_name = 'good.txt' if data.liked else 'bad.txt'
write_to_file(file_name, value_new + ';' + find_previous_question(value_new))
def find_previous_question(answer_string):
lines = chain.memory.buffer.split('\n')
last_question = None
current_question = None
for line in lines:
if line.startswith('Human:'):
current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix
elif line.startswith('AI:') and line[3:].strip() == answer_string:
return current_question # Return the previous question when the answer is found
return None
chatbot = gr.Chatbot(height=600, likeable=True)
# Use gradio.Blocks to create a context for your components and event listeners
with gr.Blocks() as demo:
index_state = gr.State(value=[])
tmp = gr.Textbox(visible=False, value="")
gr.ChatInterface(
chat,
chatbot=chatbot,
title="Doc-chat",
description="Ask about The Little Prince!",
theme="soft",
examples=["Who is the Little Prince?","What is the capital of France?"],
cache_examples=True,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
)
chatbot.like(vote, [tmp, index_state], [tmp, index_state])
demo.launch()