Spaces:

Shankarm08
/

pdfreader

Sleeping

App Files Files Community

Shankarm08 commited on Oct 5, 2024

Commit

96f0bc8

verified ·

1 Parent(s): a472326

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -10,6 +10,10 @@ model = BertModel.from_pretrained(model_name)
 # Function to get BERT embeddings
 def get_embeddings(text):
     # Ensure that text length does not exceed BERT's maximum input length
     inputs = tokenizer.encode_plus(
         text,
@@ -22,9 +26,13 @@ def get_embeddings(text):
     with torch.no_grad():  # Disable gradient calculation for inference
         outputs = model(**inputs)
-    # Extract the embeddings from the last hidden state
-    return outputs.last_hidden_state[:, 0, :].detach().cpu().numpy()  # Move to CPU before converting to numpy
 # Extract text from PDF
 def extract_text_from_pdf(pdf_file):
@@ -46,8 +54,11 @@ pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"])
 if pdf_file:
     pdf_text = extract_text_from_pdf(pdf_file)
-    pdf_embeddings = get_embeddings(pdf_text)
-    st.success("PDF loaded successfully!")
 # User input for chatbot
 user_input = st.text_input("Ask a question about the PDF:")
@@ -57,10 +68,12 @@ if st.button("Get Response"):
         st.warning("Please upload a PDF file first.")
     else:
         # Get embeddings for user input
-        user_embeddings = get_embeddings(user_input)
-        # For demonstration, simply return the PDF text.
-        # Implement similarity matching logic here as needed.
-        st.write("### Response:")
-        st.write(pdf_text)  # For simplicity, returning all text

 # Function to get BERT embeddings
 def get_embeddings(text):
+    # Check if input text is empty
+    if not text.strip():
+        raise ValueError("Input text is empty.")
     # Ensure that text length does not exceed BERT's maximum input length
     inputs = tokenizer.encode_plus(
         text,
     with torch.no_grad():  # Disable gradient calculation for inference
         outputs = model(**inputs)
+    # Check if the output contains the last hidden state
+    if hasattr(outputs, 'last_hidden_state'):
+        # Extract the embeddings from the last hidden state
+        return outputs.last_hidden_state[:, 0, :].detach().cpu().numpy()  # Move to CPU before converting to numpy
+    else:
+        raise ValueError("Model output does not contain 'last_hidden_state'. Please check the model configuration.")
 # Extract text from PDF
 def extract_text_from_pdf(pdf_file):
 if pdf_file:
     pdf_text = extract_text_from_pdf(pdf_file)
+    try:
+        pdf_embeddings = get_embeddings(pdf_text)
+        st.success("PDF loaded successfully!")
+    except Exception as e:
+        st.error(f"Error while processing PDF: {e}")
 # User input for chatbot
 user_input = st.text_input("Ask a question about the PDF:")
         st.warning("Please upload a PDF file first.")
     else:
         # Get embeddings for user input
+        try:
+            user_embeddings = get_embeddings(user_input)
+            # For demonstration, simply return the PDF text.
+            # Implement similarity matching logic here as needed.
+            st.write("### Response:")
+            st.write(pdf_text)  # For simplicity, returning all text
+        except Exception as e:
+            st.error(f"Error while processing user input: {e}")