Spaces:

Asrar990
/

Doc_Insight_Info

Build error

App Files Files Community

Asrar990 commited on Jan 1, 2025

Commit

aa5fbc4

verified ·

1 Parent(s): d1d8b9a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -14

app.py CHANGED Viewed

@@ -36,13 +36,14 @@ def extract_text_from_docx(file):
 # Function to chunk text into smaller parts
 def chunk_text(text, chunk_size=512):
-    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
     return chunks
 # Function to create FAISS index and store embeddings
 def create_faiss_index(texts, model):
     embeddings = model.encode(texts)
-    embeddings = normalize(embeddings, axis=1)  # Normalize embeddings for better comparison
     index = faiss.IndexFlatL2(embeddings.shape[1])  # Create FAISS index
     index.add(embeddings)  # Add embeddings to FAISS index
     return index, embeddings
@@ -50,11 +51,9 @@ def create_faiss_index(texts, model):
 # Function to retrieve context from FAISS
 def retrieve_context(query, index, texts, model, top_k=5):
     query_embedding = model.encode([query])
-    query_embedding = normalize(query_embedding, axis=1)  # Normalize query embedding
     distances, indices = index.search(query_embedding, top_k)
-    retrieved_texts = [texts[i] for i in indices[0] if distances[0][i] < 0.9]  # Filter by distance threshold
-    return "\n".join(retrieved_texts), len(retrieved_texts)
 # Function to query Groq API
 def query_groq_api(context, question):
@@ -100,15 +99,14 @@ def main():
         if user_question:
             if st.button("Submit Question"):
                 # Retrieve relevant context from the FAISS index
-                retrieved_context, num_relevant_texts = retrieve_context(user_question, index, chunks, embedder_model)
-                if num_relevant_texts == 0:
-                    st.warning("The question is outside the scope of the uploaded document.")
-                else:
-                    # Query Groq API with the relevant context and question
-                    answer = query_groq_api(retrieved_context, user_question)
-                    st.success(answer)
 if __name__ == "__main__":
     main()

 # Function to chunk text into smaller parts
 def chunk_text(text, chunk_size=512):
+    # Split text into chunks of specified size
+    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
     return chunks
 # Function to create FAISS index and store embeddings
 def create_faiss_index(texts, model):
     embeddings = model.encode(texts)
+    embeddings = normalize(embeddings)  # Normalize embeddings for better comparison
     index = faiss.IndexFlatL2(embeddings.shape[1])  # Create FAISS index
     index.add(embeddings)  # Add embeddings to FAISS index
     return index, embeddings
 # Function to retrieve context from FAISS
 def retrieve_context(query, index, texts, model, top_k=5):
     query_embedding = model.encode([query])
     distances, indices = index.search(query_embedding, top_k)
+    retrieved_texts = [texts[i] for i in indices[0]]
+    return "\n".join(retrieved_texts)
 # Function to query Groq API
 def query_groq_api(context, question):
         if user_question:
             if st.button("Submit Question"):
+                st.write("Answer:")
                 # Retrieve relevant context from the FAISS index
+                retrieved_context = retrieve_context(user_question, index, chunks, embedder_model)
+                # Query Groq API with the context and question
+                answer = query_groq_api(retrieved_context, user_question)
+                st.success(answer)
 if __name__ == "__main__":
     main()