|
|
import streamlit as st
|
|
|
from transformers import pipeline
|
|
|
import fitz
|
|
|
|
|
|
qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
|
|
|
text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)
|
|
|
|
|
|
|
|
|
|
|
|
def extract_PDF(file):
|
|
|
text = ""
|
|
|
with fitz.open(stream=file.read(), filetype="pdf") as doc:
|
|
|
for page in doc:
|
|
|
text += page.get_text()
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Chatbot with Huggingface")
|
|
|
|
|
|
st.subheader("Upload file")
|
|
|
pdf_file = st.file_uploader("Upload", type="pdf")
|
|
|
|
|
|
|
|
|
|
|
|
if "chat_history" not in st.session_state:
|
|
|
st.session_state.chat_history = []
|
|
|
|
|
|
if "context" not in st.session_state:
|
|
|
st.session_state.context = None
|
|
|
|
|
|
|
|
|
if pdf_file is not None and st.session_state.context is None:
|
|
|
st.session_state.context = extract_PDF(pdf_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.session_state.context:
|
|
|
st.subheader("Chat with the PDF")
|
|
|
|
|
|
question = st.text_input("You", key="user_input")
|
|
|
|
|
|
|
|
|
if question:
|
|
|
result = qa(question=question, context=st.session_state.context)
|
|
|
|
|
|
context_chunk = st.session_state.context[:1500]
|
|
|
prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
|
|
|
|
|
|
generated = text_gen(prompt, max_length=100)[0]['generated_text']
|
|
|
|
|
|
|
|
|
st.session_state.chat_history.append(
|
|
|
{"user": question, "bot": generated}
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
for chat in st.session_state.chat_history:
|
|
|
st.markdown(f"**You:** {chat['user']}")
|
|
|
st.markdown(f"**Bot:** {chat['bot']}")
|
|
|
|
|
|
else:
|
|
|
st.info("Please upload PDF to begin")
|
|
|
|