Spaces:
Sleeping
Sleeping
| import os | |
| import PyPDF2 | |
| from typing import Dict | |
| from transformers import BertTokenizerFast, BertForQuestionAnswering | |
| import torch | |
| import streamlit as st | |
| # Load the pre-trained model and tokenizer | |
| tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") | |
| model = BertForQuestionAnswering.from_pretrained("bert-base-uncased") | |
| def extract_text_from_pdf(pdf_path): | |
| with open(pdf_path, "rb") as file: | |
| reader = PyPDF2.PdfFileReader(file) | |
| text = "" | |
| for page_num in range(reader.getNumPages()): | |
| text += reader.getPage(page_num).extractText() | |
| return text | |
| def preprocess_text(question, context): | |
| inputs = tokenizer(question, context, return_tensors="pt") | |
| return inputs | |
| def question_answering_system(question, pdf_path): | |
| context = extract_text_from_pdf(pdf_path) | |
| inputs = preprocess_text(question, context) | |
| start_scores, end_scores = model(**inputs) | |
| start_index = torch.argmax(start_scores) | |
| end_index = torch.argmax(end_scores) + 1 | |
| answer_tokens = inputs["input_ids"][0][start_index:end_index] | |
| answer = tokenizer.convert_ids_to_tokens(answer_tokens) | |
| return " ".join(answer) | |
| # Set up Streamlit app | |
| st.set_page_config(page_title="PDF Question Answering", layout="wide") | |
| st.title("PDF Question Answering System") | |
| st.write("Upload a PDF file and enter a question related to its content.") | |
| pdf_file = st.file_uploader("Upload PDF File", type=["pdf"]) | |
| if pdf_file: | |
| uploaded_file_name = pdf_file.name | |
| else: | |
| uploaded_file_name = "" | |
| question = st.text_input("Enter your question:", key="question") | |
| if pdf_file and question: | |
| try: | |
| answer = question_answering_system(question, uploaded_file_name) | |
| st.success(f"Answer: {answer}") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| st.markdown("Made with ❤️ by [Streamlit](https://streamlit.io/) and [Hugging Face Transformers](https://huggingface.co/transformers/)") |