Spaces:

mgreg555
/

bme_prompt_eng

Runtime error

App Files Files Community

bme_prompt_eng / app.py

mgreg555

Update app.py

aef8e61 verified almost 2 years ago

raw

history blame contribute delete

4.4 kB

	# -- coding: utf-8 --
	"""Doc_chat_vegleges_like.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV

	# Setting up environment
	"""

	from PyPDF2 import PdfReader
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate
	from langchain_community.vectorstores import FAISS

	# Get your API keys from openai, you will need to create an account.
	# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
	import os

	print(os.environ["OPENAI_API_KEY"])

	"""# Preprocessing document"""

	# location of the pdf file/files.
	reader = PdfReader('The_Little_Prince.pdf')

	# read data from the file
	raw_text = ''
	for i, page in enumerate(reader.pages):
	text = page.extract_text()
	if text:
	raw_text += text

	# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.

	text_splitter = CharacterTextSplitter(
	separator = "\n",
	chunk_size = 800,
	chunk_overlap = 150,
	length_function = len,
	)
	texts = text_splitter.split_text(raw_text)

	len(texts)

	"""## Setting up doc search"""

	embeddings = OpenAIEmbeddings()
	doc_search = FAISS.from_texts(texts, embeddings)

	"""# Setting up chatbot"""

	from langchain.chains.question_answering import load_qa_chain
	from langchain.memory import ConversationBufferWindowMemory
	from langchain.prompts import PromptTemplate
	from langchain_openai import OpenAI

	template = """You are a chatbot having a conversation with a human.

	Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
	If You cannot find the answer say "The document does not contain that information."

	{context}

	{chat_history}
	Human: {human_input}
	Chatbot:"""

	prompt = PromptTemplate(
	input_variables=["chat_history", "human_input", "context"], template=template
	)

	memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
	chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)

	"""# Demo

	## Setting up methods
	"""

	def chat(query,history):
	docs = doc_search.similarity_search(query)
	return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']

	"""## Setting up UI with gradio"""

	import gradio as gr
	from huggingface_hub import HfFileSystem

	fs = HfFileSystem(token=os.environ.get('DATASET_ACCES'))

	def write_to_file(file_name,content):
	file_path = f"datasets/mgreg555/Little_Prince/" + file_name
	with fs.open(file_path, "r") as file_old:
	content_old = file_old.read()
	print(content_old)

	with fs.open(file_path, "w") as file:
	file.write(f"{content_old}\n" + content)


	def vote(tmp, index_state, data: gr.LikeData):
	value_new = data.value
	index_new = data.index
	file_name = 'good.txt' if data.liked else 'bad.txt'
	write_to_file(file_name, value_new + ';' + find_previous_question(value_new))

	def find_previous_question(answer_string):
	lines = chain.memory.buffer.split('\n')
	last_question = None
	current_question = None

	for line in lines:
	if line.startswith('Human:'):
	current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix
	elif line.startswith('AI:') and line[3:].strip() == answer_string:
	return current_question # Return the previous question when the answer is found

	return None

	chatbot = gr.Chatbot(height=600, likeable=True)

	# Use gradio.Blocks to create a context for your components and event listeners
	with gr.Blocks() as demo:
	index_state = gr.State(value=[])
	tmp = gr.Textbox(visible=False, value="")
	gr.ChatInterface(
	chat,
	chatbot=chatbot,
	title="Doc-chat",
	description="Ask about The Little Prince!",
	theme="soft",
	examples=["Who is the Little Prince?","What is the capital of France?"],
	cache_examples=True,
	retry_btn=None,
	undo_btn="Delete Previous",
	clear_btn="Clear",
	)
	chatbot.like(vote, [tmp, index_state], [tmp, index_state])

	demo.launch()