NHZ commited on
Commit
5492fda
·
verified ·
1 Parent(s): 75c5d0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,5 +1,3 @@
1
- import numpy as np # Import numpy first to avoid compatibility issues
2
- import faiss
3
  import os
4
  import re
5
  import requests
@@ -9,10 +7,11 @@ import faiss
9
  from sentence_transformers import SentenceTransformer
10
  from groq import Groq
11
 
12
- # Built-in Google Drive document link
13
  DOCUMENT_URL = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
 
14
 
15
- # Function to download document from the Google Drive link
16
  def download_document(file_url):
17
  file_id = file_url.split("/d/")[1].split("/")[0]
18
  download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
@@ -30,8 +29,8 @@ def extract_text_from_pdf(file_path):
30
  text += page.extract_text()
31
  return text
32
 
33
- # Chunk the text
34
- def chunk_text(text, chunk_size=500):
35
  sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
36
  chunks, current_chunk = [], ""
37
  for sentence in sentences:
@@ -61,16 +60,16 @@ def query_faiss(query, index, chunks, model, k=5):
61
  # Streamlit application
62
  def main():
63
  st.title("RAG-based Application")
64
- st.write("Interacting with a knowledge base derived from the uploaded document.")
65
 
66
- # Processing the document
67
  st.write("Processing the pre-configured document...")
68
  document_path = download_document(DOCUMENT_URL)
69
  text = extract_text_from_pdf(document_path)
70
  chunks = chunk_text(text)
71
 
72
- # Load model for embeddings
73
- st.write("Loading model and creating FAISS index...")
74
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
75
  index, embeddings = create_faiss_index(chunks, embedding_model)
76
  st.success("Document processed and indexed!")
@@ -78,16 +77,16 @@ def main():
78
  # Query the database
79
  query = st.text_input("Enter your query")
80
  if query:
 
81
  results = query_faiss(query, index, chunks, embedding_model)
82
  st.write("Top relevant chunks:")
83
  for i, result in enumerate(results):
84
  st.write(f"{i+1}. {result}")
85
 
86
  # Groq API interaction
87
- groq_api_key = os.environ.get("GROQ_API_KEY") # Securely fetched from Hugging Face Secrets
88
  if groq_api_key:
89
  client = Groq(api_key=groq_api_key)
90
-
91
  if query:
92
  st.write("Fetching response from Groq API...")
93
  chat_completion = client.chat.completions.create(
@@ -102,3 +101,4 @@ def main():
102
  if __name__ == "__main__":
103
  main()
104
 
 
 
 
 
1
  import os
2
  import re
3
  import requests
 
7
  from sentence_transformers import SentenceTransformer
8
  from groq import Groq
9
 
10
+ # Constants
11
  DOCUMENT_URL = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
12
+ CHUNK_SIZE = 500
13
 
14
+ # Function to download document
15
  def download_document(file_url):
16
  file_id = file_url.split("/d/")[1].split("/")[0]
17
  download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
 
29
  text += page.extract_text()
30
  return text
31
 
32
+ # Chunk text into smaller parts
33
+ def chunk_text(text, chunk_size=CHUNK_SIZE):
34
  sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
35
  chunks, current_chunk = [], ""
36
  for sentence in sentences:
 
60
  # Streamlit application
61
  def main():
62
  st.title("RAG-based Application")
63
+ st.write("This application uses a pre-configured document as the dataset for query responses.")
64
 
65
+ # Download and process the document
66
  st.write("Processing the pre-configured document...")
67
  document_path = download_document(DOCUMENT_URL)
68
  text = extract_text_from_pdf(document_path)
69
  chunks = chunk_text(text)
70
 
71
+ # Create FAISS index
72
+ st.write("Creating FAISS index...")
73
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
74
  index, embeddings = create_faiss_index(chunks, embedding_model)
75
  st.success("Document processed and indexed!")
 
77
  # Query the database
78
  query = st.text_input("Enter your query")
79
  if query:
80
+ st.write("Fetching relevant content from the document...")
81
  results = query_faiss(query, index, chunks, embedding_model)
82
  st.write("Top relevant chunks:")
83
  for i, result in enumerate(results):
84
  st.write(f"{i+1}. {result}")
85
 
86
  # Groq API interaction
87
+ groq_api_key = os.environ.get("GROQ_API_KEY") # Fetch API key from Hugging Face Secrets
88
  if groq_api_key:
89
  client = Groq(api_key=groq_api_key)
 
90
  if query:
91
  st.write("Fetching response from Groq API...")
92
  chat_completion = client.chat.completions.create(
 
101
  if __name__ == "__main__":
102
  main()
103
 
104
+