Spaces:

mgreg555
/

bme_prompt_eng

Runtime error

File size: 4,395 Bytes

# -*- coding: utf-8 -*-
"""Doc_chat_vegleges_like.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Igjhvd8GhC8qJf7syPEa2x0KKjroy7KV

# Setting up environment
"""

from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import ElasticVectorSearch, Pinecone, Weaviate
from langchain_community.vectorstores import FAISS

# Get your API keys from openai, you will need to create an account.
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
import os

print(os.environ["OPENAI_API_KEY"])

"""# Preprocessing document"""

# location of the pdf file/files.
reader = PdfReader('The_Little_Prince.pdf')

# read data from the file
raw_text = ''
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 150,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

len(texts)

"""## Setting up doc search"""

embeddings = OpenAIEmbeddings()
doc_search = FAISS.from_texts(texts, embeddings)

"""# Setting up chatbot"""

from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAI

template = """You are a chatbot having a conversation with a human.

Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
If You cannot find the answer say "The document does not contain that information."

{context}

{chat_history}
Human: {human_input}
Chatbot:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input", "context"], template=template
)

memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)

"""# Demo

## Setting up methods
"""

def chat(query,history):
  docs = doc_search.similarity_search(query)
  return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']

"""## Setting up UI with gradio"""

import gradio as gr
from huggingface_hub import HfFileSystem

fs = HfFileSystem(token=os.environ.get('DATASET_ACCES'))

def write_to_file(file_name,content):
    file_path = f"datasets/mgreg555/Little_Prince/" + file_name
    with fs.open(file_path, "r") as file_old:
        content_old = file_old.read()
        print(content_old)

    with fs.open(file_path, "w") as file:
        file.write(f"{content_old}\n" + content)
        

def vote(tmp, index_state, data: gr.LikeData):
    value_new = data.value
    index_new = data.index
    file_name = 'good.txt' if data.liked else 'bad.txt'
    write_to_file(file_name, value_new + ';' + find_previous_question(value_new))

def find_previous_question(answer_string):
    lines = chain.memory.buffer.split('\n')
    last_question = None
    current_question = None

    for line in lines:
        if line.startswith('Human:'):
            current_question = line[7:].strip() # Extract the question by removing the 'Human:' prefix
        elif line.startswith('AI:') and line[3:].strip() == answer_string:
            return current_question # Return the previous question when the answer is found

    return None

chatbot = gr.Chatbot(height=600, likeable=True)

# Use gradio.Blocks to create a context for your components and event listeners
with gr.Blocks() as demo:
    index_state = gr.State(value=[])
    tmp = gr.Textbox(visible=False, value="")
    gr.ChatInterface(
        chat,
        chatbot=chatbot,
        title="Doc-chat",
        description="Ask about The Little Prince!",
        theme="soft",
        examples=["Who is the Little Prince?","What is the capital of France?"],
        cache_examples=True,
        retry_btn=None,
        undo_btn="Delete Previous",
        clear_btn="Clear",
    )
    chatbot.like(vote, [tmp, index_state], [tmp, index_state])

demo.launch()