Spaces:

ojas121
/

KrishnConnect

Sleeping

App Files Files Community

ojas121 commited on Jan 4, 2025

Commit

cd874ef

verified ·

1 Parent(s): 3602c46

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -17

app.py CHANGED Viewed

@@ -1,24 +1,32 @@
-import os
-os.system("pip install huggingface_hub==0.14.1")
-os.system("pip install sentence-transformers==2.2.2")
 import streamlit as st
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
 # Function to extract text from the uploaded PDF
 def extract_text_from_pdf(pdf_file):
     reader = PyPDF2.PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
-        text += page.extract_text()
     return text
 # Function to process text into sentences and embeddings
-def process_text(text):
-    sentences = [sentence.strip() for sentence in text.split("\n") if sentence.strip()]
-    model = SentenceTransformer('all-MiniLM-L6-v2')  # A lightweight transformer model
     embeddings = model.encode(sentences, show_progress_bar=True)
-    return sentences, embeddings, model
 # Streamlit UI
 st.title("GitaGPT: Bhagavad Gita Chatbot")
@@ -31,10 +39,14 @@ if uploaded_file:
     with st.spinner("Extracting text and processing..."):
         # Step 1: Extract text
         raw_text = extract_text_from_pdf(uploaded_file)
-        # Step 2: Process text to generate embeddings
-        sentences, embeddings, model = process_text(raw_text)
     st.success("PDF processed successfully! Ask your questions below.")
     # Step 3: Input for user query
@@ -45,12 +57,13 @@ if uploaded_file:
             # Compute embedding for the user query
             query_embedding = model.encode(user_query)
             # Compute similarity scores
-            scores = util.cos_sim(query_embedding, embeddings)
-            best_match_idx = scores.argmax()
-            # Fetch the best matching sentence
-            response = sentences[best_match_idx]
-        st.write(f"**Answer:** {response}")
 else:
     st.info("Please upload a PDF file to begin.")

 import streamlit as st
 from sentence_transformers import SentenceTransformer, util
 import PyPDF2
+import nltk
+from nltk.tokenize import sent_tokenize
+# Ensure NLTK resources are downloaded
+nltk.download("punkt")
 # Function to extract text from the uploaded PDF
 def extract_text_from_pdf(pdf_file):
     reader = PyPDF2.PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text
     return text
+# Cached function to load the transformer model
+@st.cache_resource
+def load_model():
+    return SentenceTransformer('all-MiniLM-L6-v2')
 # Function to process text into sentences and embeddings
+def process_text(text, model):
+    sentences = sent_tokenize(text)  # Use NLTK for better sentence splitting
     embeddings = model.encode(sentences, show_progress_bar=True)
+    return sentences, embeddings
 # Streamlit UI
 st.title("GitaGPT: Bhagavad Gita Chatbot")
     with st.spinner("Extracting text and processing..."):
         # Step 1: Extract text
         raw_text = extract_text_from_pdf(uploaded_file)
+        if not raw_text.strip():
+            st.error("The uploaded PDF does not contain extractable text.")
+            st.stop()
+        # Step 2: Load model and process text
+        model = load_model()
+        sentences, embeddings = process_text(raw_text, model)
     st.success("PDF processed successfully! Ask your questions below.")
     # Step 3: Input for user query
             # Compute embedding for the user query
             query_embedding = model.encode(user_query)
             # Compute similarity scores
+            scores = util.cos_sim(query_embedding, embeddings).flatten()
+            top_indices = scores.argsort(descending=True)[:5]
+            top_matches = [(sentences[idx], scores[idx].item()) for idx in top_indices]
+        # Display top matches
+        st.write("**Top Responses:**")
+        for idx, (response, score) in enumerate(top_matches):
+            st.write(f"{idx + 1}. {response} (Score: {score:.4f})")
 else:
     st.info("Please upload a PDF file to begin.")