Spaces:

NHZ
/

First_Aid_Kit

Sleeping

App Files Files Community

NHZ commited on Jan 4, 2025

Commit

5492fda

verified ·

1 Parent(s): 75c5d0a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import numpy as np  # Import numpy first to avoid compatibility issues
-import faiss
 import os
 import re
 import requests
@@ -9,10 +7,11 @@ import faiss
 from sentence_transformers import SentenceTransformer
 from groq import Groq
-# Built-in Google Drive document link
 DOCUMENT_URL = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
-# Function to download document from the Google Drive link
 def download_document(file_url):
     file_id = file_url.split("/d/")[1].split("/")[0]
     download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
@@ -30,8 +29,8 @@ def extract_text_from_pdf(file_path):
             text += page.extract_text()
     return text
-# Chunk the text
-def chunk_text(text, chunk_size=500):
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
     chunks, current_chunk = [], ""
     for sentence in sentences:
@@ -61,16 +60,16 @@ def query_faiss(query, index, chunks, model, k=5):
 # Streamlit application
 def main():
     st.title("RAG-based Application")
-    st.write("Interacting with a knowledge base derived from the uploaded document.")
-    # Processing the document
     st.write("Processing the pre-configured document...")
     document_path = download_document(DOCUMENT_URL)
     text = extract_text_from_pdf(document_path)
     chunks = chunk_text(text)
-    # Load model for embeddings
-    st.write("Loading model and creating FAISS index...")
     embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
     index, embeddings = create_faiss_index(chunks, embedding_model)
     st.success("Document processed and indexed!")
@@ -78,16 +77,16 @@ def main():
     # Query the database
     query = st.text_input("Enter your query")
     if query:
         results = query_faiss(query, index, chunks, embedding_model)
         st.write("Top relevant chunks:")
         for i, result in enumerate(results):
             st.write(f"{i+1}. {result}")
     # Groq API interaction
-    groq_api_key = os.environ.get("GROQ_API_KEY")  # Securely fetched from Hugging Face Secrets
     if groq_api_key:
         client = Groq(api_key=groq_api_key)
         if query:
             st.write("Fetching response from Groq API...")
             chat_completion = client.chat.completions.create(
@@ -102,3 +101,4 @@ def main():
 if __name__ == "__main__":
     main()

 import os
 import re
 import requests
 from sentence_transformers import SentenceTransformer
 from groq import Groq
+# Constants
 DOCUMENT_URL = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
+CHUNK_SIZE = 500
+# Function to download document
 def download_document(file_url):
     file_id = file_url.split("/d/")[1].split("/")[0]
     download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
             text += page.extract_text()
     return text
+# Chunk text into smaller parts
+def chunk_text(text, chunk_size=CHUNK_SIZE):
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
     chunks, current_chunk = [], ""
     for sentence in sentences:
 # Streamlit application
 def main():
     st.title("RAG-based Application")
+    st.write("This application uses a pre-configured document as the dataset for query responses.")
+    # Download and process the document
     st.write("Processing the pre-configured document...")
     document_path = download_document(DOCUMENT_URL)
     text = extract_text_from_pdf(document_path)
     chunks = chunk_text(text)
+    # Create FAISS index
+    st.write("Creating FAISS index...")
     embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
     index, embeddings = create_faiss_index(chunks, embedding_model)
     st.success("Document processed and indexed!")
     # Query the database
     query = st.text_input("Enter your query")
     if query:
+        st.write("Fetching relevant content from the document...")
         results = query_faiss(query, index, chunks, embedding_model)
         st.write("Top relevant chunks:")
         for i, result in enumerate(results):
             st.write(f"{i+1}. {result}")
     # Groq API interaction
+    groq_api_key = os.environ.get("GROQ_API_KEY")  # Fetch API key from Hugging Face Secrets
     if groq_api_key:
         client = Groq(api_key=groq_api_key)
         if query:
             st.write("Fetching response from Groq API...")
             chat_completion = client.chat.completions.create(
 if __name__ == "__main__":
     main()