Spaces:

SujathaL
/

AWS_Restart_Program_Chatbot

Sleeping

App Files Files Community

SujathaL commited on Mar 3, 2025

Commit

a1e9850

verified ·

1 Parent(s): 3a62d0c

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -1

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 from transformers import pipeline
 import PyPDF2
 # Load Hugging Face Question Answering model
 qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
@@ -14,17 +15,38 @@ def extract_text_from_pdf(pdf_path):
             text += page.extract_text() + "\n"
     return text
 # Streamlit UI
 st.title("Chat with AWS Restart PDF")
 # Use the uploaded PDF file
 pdf_path = "AWS restart program information.docx.pdf"  # Update with your file name
 pdf_text = extract_text_from_pdf(pdf_path)
 st.write("✅ PDF Loaded Successfully!")
 # User Input
 question = st.text_input("Ask a question about AWS Restart program:")
 if st.button("Get Answer") and question:
-    response = qa_pipeline(question=question, context=pdf_text)
     st.write("Answer:", response['answer'])

 import streamlit as st
 from transformers import pipeline
 import PyPDF2
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Load Hugging Face Question Answering model
 qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
             text += page.extract_text() + "\n"
     return text
+# Function to split text into smaller chunks
+def split_text(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = text_splitter.split_text(text)
+    return chunks
+# Function to find the most relevant chunk for a question
+def find_relevant_chunk(question, chunks):
+    best_chunk = ""
+    best_score = 0
+    for chunk in chunks:
+        response = qa_pipeline(question=question, context=chunk)
+        score = response['score']
+        if score > best_score:
+            best_score = score
+            best_chunk = chunk
+    return best_chunk
 # Streamlit UI
 st.title("Chat with AWS Restart PDF")
 # Use the uploaded PDF file
 pdf_path = "AWS restart program information.docx.pdf"  # Update with your file name
 pdf_text = extract_text_from_pdf(pdf_path)
+chunks = split_text(pdf_text)  # Split the text into chunks
 st.write("✅ PDF Loaded Successfully!")
 # User Input
 question = st.text_input("Ask a question about AWS Restart program:")
 if st.button("Get Answer") and question:
+    relevant_chunk = find_relevant_chunk(question, chunks)  # Get the best chunk
+    response = qa_pipeline(question=question, context=relevant_chunk)  # Ask model on best chunk
     st.write("Answer:", response['answer'])