Akshayram1 commited on
Commit
7667205
·
verified ·
1 Parent(s): df34190

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ from typing import Dict
4
+ from transformers import BertTokenizerFast, BertForQuestionAnswering
5
+ import torch
6
+ import streamlit as st
7
+
8
+ # Load the pre-trained model and tokenizer
9
+ tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
10
+ model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
11
+
12
+ def extract_text_from_pdf(pdf_path):
13
+ with open(pdf_path, "rb") as file:
14
+ reader = PyPDF2.PdfFileReader(file)
15
+ text = ""
16
+ for page_num in range(reader.getNumPages()):
17
+ text += reader.getPage(page_num).extractText()
18
+ return text
19
+
20
+ def preprocess_text(question, context):
21
+ inputs = tokenizer(question, context, return_tensors="pt")
22
+ return inputs
23
+
24
+ def question_answering_system(question, pdf_path):
25
+ context = extract_text_from_pdf(pdf_path)
26
+ inputs = preprocess_text(question, context)
27
+ start_scores, end_scores = model(**inputs)
28
+
29
+ start_index = torch.argmax(start_scores)
30
+ end_index = torch.argmax(end_scores) + 1
31
+ answer_tokens = inputs["input_ids"][0][start_index:end_index]
32
+
33
+ answer = tokenizer.convert_ids_to_tokens(answer_tokens)
34
+ return " ".join(answer)
35
+
36
+ # Set up Streamlit app
37
+ st.set_page_config(page_title="PDF Question Answering", layout="wide")
38
+
39
+ st.title("PDF Question Answering System")
40
+ st.write("Upload a PDF file and enter a question related to its content.")
41
+
42
+ pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
43
+ if pdf_file:
44
+ uploaded_file_name = pdf_file.name
45
+ else:
46
+ uploaded_file_name = ""
47
+
48
+ question = st.text_input("Enter your question:", key="question")
49
+
50
+ if pdf_file and question:
51
+ try:
52
+ answer = question_answering_system(question, uploaded_file_name)
53
+ st.success(f"Answer: {answer}")
54
+ except Exception as e:
55
+ st.error(f"Error: {str(e)}")
56
+
57
+ st.markdown("Made with ❤️ by [Streamlit](https://streamlit.io/) and [Hugging Face Transformers](https://huggingface.co/transformers/)")