Asrar990 commited on
Commit
aa5fbc4
·
verified ·
1 Parent(s): d1d8b9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -14
app.py CHANGED
@@ -36,13 +36,14 @@ def extract_text_from_docx(file):
36
 
37
  # Function to chunk text into smaller parts
38
  def chunk_text(text, chunk_size=512):
39
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 
40
  return chunks
41
 
42
  # Function to create FAISS index and store embeddings
43
  def create_faiss_index(texts, model):
44
  embeddings = model.encode(texts)
45
- embeddings = normalize(embeddings, axis=1) # Normalize embeddings for better comparison
46
  index = faiss.IndexFlatL2(embeddings.shape[1]) # Create FAISS index
47
  index.add(embeddings) # Add embeddings to FAISS index
48
  return index, embeddings
@@ -50,11 +51,9 @@ def create_faiss_index(texts, model):
50
  # Function to retrieve context from FAISS
51
  def retrieve_context(query, index, texts, model, top_k=5):
52
  query_embedding = model.encode([query])
53
- query_embedding = normalize(query_embedding, axis=1) # Normalize query embedding
54
  distances, indices = index.search(query_embedding, top_k)
55
-
56
- retrieved_texts = [texts[i] for i in indices[0] if distances[0][i] < 0.9] # Filter by distance threshold
57
- return "\n".join(retrieved_texts), len(retrieved_texts)
58
 
59
  # Function to query Groq API
60
  def query_groq_api(context, question):
@@ -100,15 +99,14 @@ def main():
100
 
101
  if user_question:
102
  if st.button("Submit Question"):
 
 
103
  # Retrieve relevant context from the FAISS index
104
- retrieved_context, num_relevant_texts = retrieve_context(user_question, index, chunks, embedder_model)
105
-
106
- if num_relevant_texts == 0:
107
- st.warning("The question is outside the scope of the uploaded document.")
108
- else:
109
- # Query Groq API with the relevant context and question
110
- answer = query_groq_api(retrieved_context, user_question)
111
- st.success(answer)
112
 
113
  if __name__ == "__main__":
114
  main()
 
36
 
37
  # Function to chunk text into smaller parts
38
  def chunk_text(text, chunk_size=512):
39
+ # Split text into chunks of specified size
40
+ chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
41
  return chunks
42
 
43
  # Function to create FAISS index and store embeddings
44
  def create_faiss_index(texts, model):
45
  embeddings = model.encode(texts)
46
+ embeddings = normalize(embeddings) # Normalize embeddings for better comparison
47
  index = faiss.IndexFlatL2(embeddings.shape[1]) # Create FAISS index
48
  index.add(embeddings) # Add embeddings to FAISS index
49
  return index, embeddings
 
51
  # Function to retrieve context from FAISS
52
  def retrieve_context(query, index, texts, model, top_k=5):
53
  query_embedding = model.encode([query])
 
54
  distances, indices = index.search(query_embedding, top_k)
55
+ retrieved_texts = [texts[i] for i in indices[0]]
56
+ return "\n".join(retrieved_texts)
 
57
 
58
  # Function to query Groq API
59
  def query_groq_api(context, question):
 
99
 
100
  if user_question:
101
  if st.button("Submit Question"):
102
+ st.write("Answer:")
103
+
104
  # Retrieve relevant context from the FAISS index
105
+ retrieved_context = retrieve_context(user_question, index, chunks, embedder_model)
106
+
107
+ # Query Groq API with the context and question
108
+ answer = query_groq_api(retrieved_context, user_question)
109
+ st.success(answer)
 
 
 
110
 
111
  if __name__ == "__main__":
112
  main()