| import streamlit as st |
| from transformers import pipeline |
| import requests |
| from PyPDF2 import PdfReader |
|
|
| |
| qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
| |
| def extract_text_from_huggingface_pdf(url): |
| response = requests.get(url) |
| with open("document.pdf", "wb") as f: |
| f.write(response.content) |
| pdf_reader = PdfReader("document.pdf") |
| text = "" |
| for page in pdf_reader.pages: |
| text += page.extract_text() + "\n" |
| return text |
|
|
| |
| PDF_URL = "https://huggingface.co/spaces/SujathaL/AWS_Restart_Program_Chatbot/blob/main/AWS%20restart%20program%20information.docx.pdf" |
| pdf_text = extract_text_from_huggingface_pdf(PDF_URL) |
|
|
| |
| st.title("Chat with Your PDF") |
|
|
| |
| question = st.text_input("Ask a question about the PDF:") |
|
|
| if st.button("Get Answer") and question: |
| response = qa_pipeline(question=question, context=pdf_text) |
| st.write("Answer:", response['answer']) |
|
|