Spaces:

amn-sdqi
/

chatbot

Sleeping

File size: 2,013 Bytes

5c6086a

import streamlit as st
from transformers import pipeline
import fitz

qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)


# extract text from uploaded document
def extract_PDF(file):
    text = ""
    with fitz.open(stream=file.read(), filetype="pdf") as doc:
        for page in doc:
            text += page.get_text() # type: ignore
    return text


# ------------------------------------------------------------------------------

# -----------------------------------Streamlit UI--------------------------------

st.title("Chatbot with Huggingface")

st.subheader("Upload file")
pdf_file = st.file_uploader("Upload", type="pdf")

# Initialize Session state for convo history

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

if "context" not in st.session_state:
    st.session_state.context = None

# extract text and store in the session
if pdf_file is not None and st.session_state.context is None:
    st.session_state.context = extract_PDF(pdf_file)


# Chat section

if st.session_state.context:
    st.subheader("Chat with the PDF")

    question = st.text_input("You", key="user_input")

    
    if question:
        result = qa(question=question, context=st.session_state.context) # type: ignore
        
        context_chunk = st.session_state.context[:1500]
        prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
        
        generated = text_gen(prompt, max_length=100)[0]['generated_text']  # type: ignore

        # save convo
        st.session_state.chat_history.append(
            {"user": question, "bot": generated}
        )

        # Display chat

    for chat in st.session_state.chat_history:
        st.markdown(f"**You:** {chat['user']}")
        st.markdown(f"**Bot:** {chat['bot']}")

else:
    st.info("Please upload PDF to begin")