Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import subprocess | |
| import sys | |
| # Function to install packages if not already installed | |
| def install_package(package_name): | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", package_name]) | |
| # Install required packages if not already installed | |
| try: | |
| import pdfplumber | |
| except ModuleNotFoundError: | |
| install_package('pdfplumber') | |
| try: | |
| from transformers import pipeline | |
| except ModuleNotFoundError: | |
| install_package('transformers') | |
| from transformers import pipeline | |
| # Ensure that either PyTorch or TensorFlow is installed | |
| try: | |
| import torch | |
| except ModuleNotFoundError: | |
| install_package('torch') | |
| import torch | |
| # Function to extract text from PDFs using pdfplumber | |
| def extract_text_from_pdfs(pdf_files): | |
| pdf_texts = {} | |
| for pdf_file in pdf_files: | |
| with pdfplumber.open(pdf_file) as pdf: | |
| text = "" | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| pdf_texts[pdf_file.name] = text | |
| return pdf_texts | |
| # Load pre-trained QA model | |
| qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad') | |
| # Function to answer questions based on extracted text | |
| def answer_question(pdf_texts, question): | |
| context = " ".join(pdf_texts.values()) | |
| result = qa_pipeline(question=question, context=context) | |
| return result['answer'] | |
| # Streamlit application | |
| st.title("PDF Question Answering App") | |
| # File uploader for PDF files | |
| uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) | |
| # Display uploaded files | |
| if uploaded_files: | |
| # Extract text from PDFs | |
| pdf_texts = extract_text_from_pdfs(uploaded_files) | |
| st.write("PDFs Uploaded Successfully!") | |
| # Question input | |
| question = st.text_input("Enter your question:") | |
| if st.button("Get Answer"): | |
| if question: | |
| answer = answer_question(pdf_texts, question) | |
| st.write(f"Answer: {answer}") | |
| else: | |
| st.write("Please enter a question.") | |
| else: | |
| st.write("Please upload PDF files to continue.") | |