import os
import PyPDF2
from typing import Dict
from transformers import BertTokenizerFast, BertForQuestionAnswering
import torch
import streamlit as st

# Load the pre-trained model and tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfFileReader(file)
        text = ""
        for page_num in range(reader.getNumPages()):
            text += reader.getPage(page_num).extractText()
    return text

def preprocess_text(question, context):
    inputs = tokenizer(question, context, return_tensors="pt")
    return inputs

def question_answering_system(question, pdf_path):
    context = extract_text_from_pdf(pdf_path)
    inputs = preprocess_text(question, context)
    start_scores, end_scores = model(**inputs)
    
    start_index = torch.argmax(start_scores)
    end_index = torch.argmax(end_scores) + 1
    answer_tokens = inputs["input_ids"][0][start_index:end_index]
    
    answer = tokenizer.convert_ids_to_tokens(answer_tokens)
    return " ".join(answer)

# Set up Streamlit app
st.set_page_config(page_title="PDF Question Answering", layout="wide")

st.title("PDF Question Answering System")
st.write("Upload a PDF file and enter a question related to its content.")

pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
if pdf_file:
    uploaded_file_name = pdf_file.name
else:
    uploaded_file_name = ""

question = st.text_input("Enter your question:", key="question")

if pdf_file and question:
    try:
        answer = question_answering_system(question, uploaded_file_name)
        st.success(f"Answer: {answer}")
    except Exception as e:
        st.error(f"Error: {str(e)}")

st.markdown("Made with ❤️ by [Streamlit](https://streamlit.io/) and [Hugging Face Transformers](https://huggingface.co/transformers/)")