File size: 2,013 Bytes
5c6086a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
from transformers import pipeline
import fitz

qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)


# extract text from uploaded document
def extract_PDF(file):
    text = ""
    with fitz.open(stream=file.read(), filetype="pdf") as doc:
        for page in doc:
            text += page.get_text() # type: ignore
    return text


# ------------------------------------------------------------------------------

# -----------------------------------Streamlit UI--------------------------------

st.title("Chatbot with Huggingface")

st.subheader("Upload file")
pdf_file = st.file_uploader("Upload", type="pdf")

# Initialize Session state for convo history

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

if "context" not in st.session_state:
    st.session_state.context = None

# extract text and store in the session
if pdf_file is not None and st.session_state.context is None:
    st.session_state.context = extract_PDF(pdf_file)


# Chat section

if st.session_state.context:
    st.subheader("Chat with the PDF")

    question = st.text_input("You", key="user_input")

    
    if question:
        result = qa(question=question, context=st.session_state.context) # type: ignore
        
        context_chunk = st.session_state.context[:1500]
        prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
        
        generated = text_gen(prompt, max_length=100)[0]['generated_text']  # type: ignore

        # save convo
        st.session_state.chat_history.append(
            {"user": question, "bot": generated}
        )

        # Display chat

    for chat in st.session_state.chat_history:
        st.markdown(f"**You:** {chat['user']}")
        st.markdown(f"**Bot:** {chat['bot']}")

else:
    st.info("Please upload PDF to begin")