import streamlit as st from transformers import pipeline import fitz qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0) text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0) # extract text from uploaded document def extract_PDF(file): text = "" with fitz.open(stream=file.read(), filetype="pdf") as doc: for page in doc: text += page.get_text() # type: ignore return text # ------------------------------------------------------------------------------ # -----------------------------------Streamlit UI-------------------------------- st.title("Chatbot with Huggingface") st.subheader("Upload file") pdf_file = st.file_uploader("Upload", type="pdf") # Initialize Session state for convo history if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "context" not in st.session_state: st.session_state.context = None # extract text and store in the session if pdf_file is not None and st.session_state.context is None: st.session_state.context = extract_PDF(pdf_file) # Chat section if st.session_state.context: st.subheader("Chat with the PDF") question = st.text_input("You", key="user_input") if question: result = qa(question=question, context=st.session_state.context) # type: ignore context_chunk = st.session_state.context[:1500] prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:" generated = text_gen(prompt, max_length=100)[0]['generated_text'] # type: ignore # save convo st.session_state.chat_history.append( {"user": question, "bot": generated} ) # Display chat for chat in st.session_state.chat_history: st.markdown(f"**You:** {chat['user']}") st.markdown(f"**Bot:** {chat['bot']}") else: st.info("Please upload PDF to begin")