project-stram / app.py
Akshayram1's picture
Create app.py
7667205 verified
import os
import PyPDF2
from typing import Dict
from transformers import BertTokenizerFast, BertForQuestionAnswering
import torch
import streamlit as st
# Load the pre-trained model and tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
def extract_text_from_pdf(pdf_path):
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfFileReader(file)
text = ""
for page_num in range(reader.getNumPages()):
text += reader.getPage(page_num).extractText()
return text
def preprocess_text(question, context):
inputs = tokenizer(question, context, return_tensors="pt")
return inputs
def question_answering_system(question, pdf_path):
context = extract_text_from_pdf(pdf_path)
inputs = preprocess_text(question, context)
start_scores, end_scores = model(**inputs)
start_index = torch.argmax(start_scores)
end_index = torch.argmax(end_scores) + 1
answer_tokens = inputs["input_ids"][0][start_index:end_index]
answer = tokenizer.convert_ids_to_tokens(answer_tokens)
return " ".join(answer)
# Set up Streamlit app
st.set_page_config(page_title="PDF Question Answering", layout="wide")
st.title("PDF Question Answering System")
st.write("Upload a PDF file and enter a question related to its content.")
pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
if pdf_file:
uploaded_file_name = pdf_file.name
else:
uploaded_file_name = ""
question = st.text_input("Enter your question:", key="question")
if pdf_file and question:
try:
answer = question_answering_system(question, uploaded_file_name)
st.success(f"Answer: {answer}")
except Exception as e:
st.error(f"Error: {str(e)}")
st.markdown("Made with ❤️ by [Streamlit](https://streamlit.io/) and [Hugging Face Transformers](https://huggingface.co/transformers/)")