Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline
|
| 3 |
import PyPDF2
|
|
|
|
| 4 |
|
| 5 |
# Load Hugging Face Question Answering model
|
| 6 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
|
@@ -14,17 +15,38 @@ def extract_text_from_pdf(pdf_path):
|
|
| 14 |
text += page.extract_text() + "\n"
|
| 15 |
return text
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Streamlit UI
|
| 18 |
st.title("Chat with AWS Restart PDF")
|
| 19 |
|
| 20 |
# Use the uploaded PDF file
|
| 21 |
pdf_path = "AWS restart program information.docx.pdf" # Update with your file name
|
| 22 |
pdf_text = extract_text_from_pdf(pdf_path)
|
|
|
|
|
|
|
| 23 |
st.write("✅ PDF Loaded Successfully!")
|
| 24 |
|
| 25 |
# User Input
|
| 26 |
question = st.text_input("Ask a question about AWS Restart program:")
|
| 27 |
|
| 28 |
if st.button("Get Answer") and question:
|
| 29 |
-
|
|
|
|
| 30 |
st.write("Answer:", response['answer'])
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline
|
| 3 |
import PyPDF2
|
| 4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
|
| 6 |
# Load Hugging Face Question Answering model
|
| 7 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
|
|
|
| 15 |
text += page.extract_text() + "\n"
|
| 16 |
return text
|
| 17 |
|
| 18 |
+
# Function to split text into smaller chunks
|
| 19 |
+
def split_text(text):
|
| 20 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 21 |
+
chunks = text_splitter.split_text(text)
|
| 22 |
+
return chunks
|
| 23 |
+
|
| 24 |
+
# Function to find the most relevant chunk for a question
|
| 25 |
+
def find_relevant_chunk(question, chunks):
|
| 26 |
+
best_chunk = ""
|
| 27 |
+
best_score = 0
|
| 28 |
+
for chunk in chunks:
|
| 29 |
+
response = qa_pipeline(question=question, context=chunk)
|
| 30 |
+
score = response['score']
|
| 31 |
+
if score > best_score:
|
| 32 |
+
best_score = score
|
| 33 |
+
best_chunk = chunk
|
| 34 |
+
return best_chunk
|
| 35 |
+
|
| 36 |
# Streamlit UI
|
| 37 |
st.title("Chat with AWS Restart PDF")
|
| 38 |
|
| 39 |
# Use the uploaded PDF file
|
| 40 |
pdf_path = "AWS restart program information.docx.pdf" # Update with your file name
|
| 41 |
pdf_text = extract_text_from_pdf(pdf_path)
|
| 42 |
+
chunks = split_text(pdf_text) # Split the text into chunks
|
| 43 |
+
|
| 44 |
st.write("✅ PDF Loaded Successfully!")
|
| 45 |
|
| 46 |
# User Input
|
| 47 |
question = st.text_input("Ask a question about AWS Restart program:")
|
| 48 |
|
| 49 |
if st.button("Get Answer") and question:
|
| 50 |
+
relevant_chunk = find_relevant_chunk(question, chunks) # Get the best chunk
|
| 51 |
+
response = qa_pipeline(question=question, context=relevant_chunk) # Ask model on best chunk
|
| 52 |
st.write("Answer:", response['answer'])
|