File size: 2,095 Bytes
7b480a8
2e67cbf
 
 
f7264d1
 
 
 
 
2e67cbf
 
 
f7264d1
2e67cbf
4b3bafa
 
 
f7264d1
 
 
935599b
f7264d1
 
 
 
 
2189cdb
d645a31
2189cdb
 
 
d645a31
2189cdb
d645a31
 
 
2189cdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b480a8
2189cdb
d645a31
2189cdb
7b480a8
2189cdb
 
 
7b480a8
2189cdb
 
 
7b480a8
2189cdb
7b480a8
2189cdb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import subprocess
import sys

# Function to install packages if not already installed
def install_package(package_name):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])

# Install required packages if not already installed
try:
    import pdfplumber
except ModuleNotFoundError:
    install_package('pdfplumber')

try:
    from transformers import pipeline
except ModuleNotFoundError:
    install_package('transformers')
    from transformers import pipeline

# Ensure that either PyTorch or TensorFlow is installed
try:
    import torch
except ModuleNotFoundError:
    install_package('torch')
    import torch

# Function to extract text from PDFs using pdfplumber
def extract_text_from_pdfs(pdf_files):
    pdf_texts = {}
    for pdf_file in pdf_files:
        with pdfplumber.open(pdf_file) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text()
            pdf_texts[pdf_file.name] = text
    return pdf_texts

# Load pre-trained QA model
qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')

# Function to answer questions based on extracted text
def answer_question(pdf_texts, question):
    context = " ".join(pdf_texts.values())
    result = qa_pipeline(question=question, context=context)
    return result['answer']

# Streamlit application
st.title("PDF Question Answering App")

# File uploader for PDF files
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)

# Display uploaded files
if uploaded_files:
    # Extract text from PDFs
    pdf_texts = extract_text_from_pdfs(uploaded_files)
    st.write("PDFs Uploaded Successfully!")

    # Question input
    question = st.text_input("Enter your question:")
    
    if st.button("Get Answer"):
        if question:
            answer = answer_question(pdf_texts, question)
            st.write(f"Answer: {answer}")
        else:
            st.write("Please enter a question.")
else:
    st.write("Please upload PDF files to continue.")