SujathaL commited on
Commit
a1e9850
·
verified ·
1 Parent(s): 3a62d0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import PyPDF2
 
4
 
5
  # Load Hugging Face Question Answering model
6
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
@@ -14,17 +15,38 @@ def extract_text_from_pdf(pdf_path):
14
  text += page.extract_text() + "\n"
15
  return text
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Streamlit UI
18
  st.title("Chat with AWS Restart PDF")
19
 
20
  # Use the uploaded PDF file
21
  pdf_path = "AWS restart program information.docx.pdf" # Update with your file name
22
  pdf_text = extract_text_from_pdf(pdf_path)
 
 
23
  st.write("✅ PDF Loaded Successfully!")
24
 
25
  # User Input
26
  question = st.text_input("Ask a question about AWS Restart program:")
27
 
28
  if st.button("Get Answer") and question:
29
- response = qa_pipeline(question=question, context=pdf_text)
 
30
  st.write("Answer:", response['answer'])
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import PyPDF2
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
 
6
  # Load Hugging Face Question Answering model
7
  qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
 
15
  text += page.extract_text() + "\n"
16
  return text
17
 
18
+ # Function to split text into smaller chunks
19
+ def split_text(text):
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
21
+ chunks = text_splitter.split_text(text)
22
+ return chunks
23
+
24
+ # Function to find the most relevant chunk for a question
25
+ def find_relevant_chunk(question, chunks):
26
+ best_chunk = ""
27
+ best_score = 0
28
+ for chunk in chunks:
29
+ response = qa_pipeline(question=question, context=chunk)
30
+ score = response['score']
31
+ if score > best_score:
32
+ best_score = score
33
+ best_chunk = chunk
34
+ return best_chunk
35
+
36
  # Streamlit UI
37
  st.title("Chat with AWS Restart PDF")
38
 
39
  # Use the uploaded PDF file
40
  pdf_path = "AWS restart program information.docx.pdf" # Update with your file name
41
  pdf_text = extract_text_from_pdf(pdf_path)
42
+ chunks = split_text(pdf_text) # Split the text into chunks
43
+
44
  st.write("✅ PDF Loaded Successfully!")
45
 
46
  # User Input
47
  question = st.text_input("Ask a question about AWS Restart program:")
48
 
49
  if st.button("Get Answer") and question:
50
+ relevant_chunk = find_relevant_chunk(question, chunks) # Get the best chunk
51
+ response = qa_pipeline(question=question, context=relevant_chunk) # Ask model on best chunk
52
  st.write("Answer:", response['answer'])