File size: 2,013 Bytes
5c6086a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import streamlit as st
from transformers import pipeline
import fitz
qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)
# extract text from uploaded document
def extract_PDF(file):
text = ""
with fitz.open(stream=file.read(), filetype="pdf") as doc:
for page in doc:
text += page.get_text() # type: ignore
return text
# ------------------------------------------------------------------------------
# -----------------------------------Streamlit UI--------------------------------
st.title("Chatbot with Huggingface")
st.subheader("Upload file")
pdf_file = st.file_uploader("Upload", type="pdf")
# Initialize Session state for convo history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "context" not in st.session_state:
st.session_state.context = None
# extract text and store in the session
if pdf_file is not None and st.session_state.context is None:
st.session_state.context = extract_PDF(pdf_file)
# Chat section
if st.session_state.context:
st.subheader("Chat with the PDF")
question = st.text_input("You", key="user_input")
if question:
result = qa(question=question, context=st.session_state.context) # type: ignore
context_chunk = st.session_state.context[:1500]
prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
generated = text_gen(prompt, max_length=100)[0]['generated_text'] # type: ignore
# save convo
st.session_state.chat_history.append(
{"user": question, "bot": generated}
)
# Display chat
for chat in st.session_state.chat_history:
st.markdown(f"**You:** {chat['user']}")
st.markdown(f"**Bot:** {chat['bot']}")
else:
st.info("Please upload PDF to begin")
|