Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| import fitz # PyMuPDF | |
| import tempfile | |
| import os | |
| # Load the QA model | |
| qa_model = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2") | |
| # Function to extract text from a PDF file | |
| def extract_text_from_pdf(uploaded_file): | |
| temp_file = None | |
| try: | |
| # Save the uploaded PDF as a temporary file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") | |
| temp_file.write(uploaded_file.read()) | |
| # Open the temporary PDF file and extract text | |
| doc = fitz.open(temp_file.name) | |
| text = "" | |
| for page_num in range(doc.page_count): | |
| page = doc[page_num] | |
| text += page.get_text() | |
| doc.close() | |
| return text | |
| except Exception as e: | |
| st.error(f"Error extracting text from PDF: {str(e)}") | |
| return None | |
| finally: | |
| # Remove the temporary file | |
| if temp_file: | |
| temp_file.close() | |
| # Uncomment the line below if you want to delete the temporary file after use | |
| # os.remove(temp_file.name) | |
| # Streamlit app | |
| def main(): | |
| st.title("PDF Question Answering App") | |
| # Upload PDF file through Streamlit | |
| uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| # Read the PDF file and extract text | |
| pdf_text = extract_text_from_pdf(uploaded_file) | |
| if pdf_text is not None: | |
| # Display the extracted text | |
| st.subheader("Extracted Text from PDF") | |
| st.text(pdf_text) | |
| # Input for user question | |
| question = st.text_input("Ask a question about the PDF:") | |
| # Button to trigger question answering | |
| if st.button("Get Answer"): | |
| if question: | |
| # Use the QA model to get the answer | |
| answer = qa_model(question=question, context=pdf_text) | |
| st.subheader("Answer:") | |
| st.write(answer["answer"]) | |
| else: | |
| st.warning("Please enter a question.") | |
| if __name__ == "__main__": | |
| main() | |