Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,13 +36,14 @@ def extract_text_from_docx(file):
|
|
| 36 |
|
| 37 |
# Function to chunk text into smaller parts
|
| 38 |
def chunk_text(text, chunk_size=512):
|
| 39 |
-
|
|
|
|
| 40 |
return chunks
|
| 41 |
|
| 42 |
# Function to create FAISS index and store embeddings
|
| 43 |
def create_faiss_index(texts, model):
|
| 44 |
embeddings = model.encode(texts)
|
| 45 |
-
embeddings = normalize(embeddings
|
| 46 |
index = faiss.IndexFlatL2(embeddings.shape[1]) # Create FAISS index
|
| 47 |
index.add(embeddings) # Add embeddings to FAISS index
|
| 48 |
return index, embeddings
|
|
@@ -50,11 +51,9 @@ def create_faiss_index(texts, model):
|
|
| 50 |
# Function to retrieve context from FAISS
|
| 51 |
def retrieve_context(query, index, texts, model, top_k=5):
|
| 52 |
query_embedding = model.encode([query])
|
| 53 |
-
query_embedding = normalize(query_embedding, axis=1) # Normalize query embedding
|
| 54 |
distances, indices = index.search(query_embedding, top_k)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
return "\n".join(retrieved_texts), len(retrieved_texts)
|
| 58 |
|
| 59 |
# Function to query Groq API
|
| 60 |
def query_groq_api(context, question):
|
|
@@ -100,15 +99,14 @@ def main():
|
|
| 100 |
|
| 101 |
if user_question:
|
| 102 |
if st.button("Submit Question"):
|
|
|
|
|
|
|
| 103 |
# Retrieve relevant context from the FAISS index
|
| 104 |
-
retrieved_context
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
# Query Groq API with the relevant context and question
|
| 110 |
-
answer = query_groq_api(retrieved_context, user_question)
|
| 111 |
-
st.success(answer)
|
| 112 |
|
| 113 |
if __name__ == "__main__":
|
| 114 |
main()
|
|
|
|
| 36 |
|
| 37 |
# Function to chunk text into smaller parts
|
| 38 |
def chunk_text(text, chunk_size=512):
|
| 39 |
+
# Split text into chunks of specified size
|
| 40 |
+
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
| 41 |
return chunks
|
| 42 |
|
| 43 |
# Function to create FAISS index and store embeddings
|
| 44 |
def create_faiss_index(texts, model):
|
| 45 |
embeddings = model.encode(texts)
|
| 46 |
+
embeddings = normalize(embeddings) # Normalize embeddings for better comparison
|
| 47 |
index = faiss.IndexFlatL2(embeddings.shape[1]) # Create FAISS index
|
| 48 |
index.add(embeddings) # Add embeddings to FAISS index
|
| 49 |
return index, embeddings
|
|
|
|
| 51 |
# Function to retrieve context from FAISS
|
| 52 |
def retrieve_context(query, index, texts, model, top_k=5):
|
| 53 |
query_embedding = model.encode([query])
|
|
|
|
| 54 |
distances, indices = index.search(query_embedding, top_k)
|
| 55 |
+
retrieved_texts = [texts[i] for i in indices[0]]
|
| 56 |
+
return "\n".join(retrieved_texts)
|
|
|
|
| 57 |
|
| 58 |
# Function to query Groq API
|
| 59 |
def query_groq_api(context, question):
|
|
|
|
| 99 |
|
| 100 |
if user_question:
|
| 101 |
if st.button("Submit Question"):
|
| 102 |
+
st.write("Answer:")
|
| 103 |
+
|
| 104 |
# Retrieve relevant context from the FAISS index
|
| 105 |
+
retrieved_context = retrieve_context(user_question, index, chunks, embedder_model)
|
| 106 |
+
|
| 107 |
+
# Query Groq API with the context and question
|
| 108 |
+
answer = query_groq_api(retrieved_context, user_question)
|
| 109 |
+
st.success(answer)
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
if __name__ == "__main__":
|
| 112 |
main()
|