Spaces:
Sleeping
Sleeping
File size: 2,095 Bytes
7b480a8 2e67cbf f7264d1 2e67cbf f7264d1 2e67cbf 4b3bafa f7264d1 935599b f7264d1 2189cdb d645a31 2189cdb d645a31 2189cdb d645a31 2189cdb 7b480a8 2189cdb d645a31 2189cdb 7b480a8 2189cdb 7b480a8 2189cdb 7b480a8 2189cdb 7b480a8 2189cdb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import streamlit as st
import subprocess
import sys
# Function to install packages if not already installed
def install_package(package_name):
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
# Install required packages if not already installed
try:
import pdfplumber
except ModuleNotFoundError:
install_package('pdfplumber')
try:
from transformers import pipeline
except ModuleNotFoundError:
install_package('transformers')
from transformers import pipeline
# Ensure that either PyTorch or TensorFlow is installed
try:
import torch
except ModuleNotFoundError:
install_package('torch')
import torch
# Function to extract text from PDFs using pdfplumber
def extract_text_from_pdfs(pdf_files):
pdf_texts = {}
for pdf_file in pdf_files:
with pdfplumber.open(pdf_file) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
pdf_texts[pdf_file.name] = text
return pdf_texts
# Load pre-trained QA model
qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')
# Function to answer questions based on extracted text
def answer_question(pdf_texts, question):
context = " ".join(pdf_texts.values())
result = qa_pipeline(question=question, context=context)
return result['answer']
# Streamlit application
st.title("PDF Question Answering App")
# File uploader for PDF files
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
# Display uploaded files
if uploaded_files:
# Extract text from PDFs
pdf_texts = extract_text_from_pdfs(uploaded_files)
st.write("PDFs Uploaded Successfully!")
# Question input
question = st.text_input("Enter your question:")
if st.button("Get Answer"):
if question:
answer = answer_question(pdf_texts, question)
st.write(f"Answer: {answer}")
else:
st.write("Please enter a question.")
else:
st.write("Please upload PDF files to continue.")
|