Spaces:

mgreg555
/

docs_chat

Runtime error

App Files Files Community

docs_chat / app.py

mgreg555

Update app.py

837390a verified almost 2 years ago

raw

history blame contribute delete

4.65 kB

	# -- coding: utf-8 --
	"""Doc_chat_vegleges_like.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV

	# Setting up environment
	"""

	from PyPDF2 import PdfReader
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate
	from langchain_community.vectorstores import FAISS

	# Get your API keys from openai, you will need to create an account.
	# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
	import os

	print(os.environ["OPENAI_API_KEY"])
	print(os.environ["DATASET_ACCES"],'HALOOO')
	"""# Preprocessing document"""

	# location of the pdf file/files.
	reader = PdfReader('samu-en-567.pdf')
	#reader = PdfReader('/content/WOW.pdf')
	#reader = PdfReader('/content/the_little_prince.pdf')
	#reader = PdfReader('/content/constitution.pdf')

	# read data from the file
	raw_text = ''
	for i, page in enumerate(reader.pages):
	text = page.extract_text()
	if text:
	raw_text += text

	# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.

	text_splitter = CharacterTextSplitter(
	separator = "\n",
	chunk_size = 800,
	chunk_overlap = 150,
	length_function = len,
	)
	texts = text_splitter.split_text(raw_text)

	len(texts)

	"""## Setting up doc search"""

	embeddings = OpenAIEmbeddings()
	doc_search = FAISS.from_texts(texts, embeddings)

	"""# Setting up chatbot"""

	from langchain.chains.question_answering import load_qa_chain
	from langchain.memory import ConversationBufferWindowMemory
	from langchain.prompts import PromptTemplate
	from langchain_openai import OpenAI

	template = """You are a chatbot having a conversation with a human.

	Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
	Any questions outside of the document is irrelevant and you certanly dont know! If You cannot find the answer say "The document does not contain that information."

	{context}

	{chat_history}
	Human: {human_input}
	Chatbot:"""

	prompt = PromptTemplate(
	input_variables=["chat_history", "human_input", "context"], template=template
	)

	memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
	chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)

	"""# Demo

	## Setting up methods
	"""

	def chat(query,history):
	docs = doc_search.similarity_search(query)
	return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']

	"""## Setting up UI with gradio"""

	import gradio as gr
	from huggingface_hub import HfFileSystem

	fs = HfFileSystem(token=os.environ.get('DATASET_ACCES'))

	def write_to_file(file_name,content):
	file_path = f"datasets/mgreg555/samu_reference_book/" + file_name
	with fs.open(file_path, "r") as file_old:
	content_old = file_old.read()
	print(content_old)

	with fs.open(file_path, "w") as file:
	file.write(f"{content_old}\n" + content)

	# Example usage


	def vote(tmp, index_state, data: gr.LikeData):
	value_new = data.value
	index_new = data.index
	file_name = 'good.txt' if data.liked else 'bad.txt'
	write_to_file(file_name, value_new + ';' + find_previous_question(value_new))

	def find_previous_question(answer_string):
	lines = chain.memory.buffer.split('\n')
	last_question = None
	current_question = None

	for line in lines:
	if line.startswith('Human:'):
	current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix
	elif line.startswith('AI:') and line[3:].strip() == answer_string:
	return current_question # Return the previous question when the answer is found

	return None

	chatbot = gr.Chatbot(height=600, likeable=True)

	# Use gradio.Blocks to create a context for your components and event listeners
	with gr.Blocks() as demo:
	index_state = gr.State(value=[])
	tmp = gr.Textbox(visible=False, value="")
	gr.ChatInterface(
	chat,
	chatbot=chatbot,
	title="Doc-chat",
	description="Ask about SAMU!",
	theme="soft",
	examples=["What is SAMU?","What is the capital of France?"],
	cache_examples=True,
	retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear",
	)
	chatbot.like(vote, [tmp, index_state], [tmp, index_state])

	demo.launch()