Spaces:

NHZ
/

First_Aid_Kit

Sleeping

App Files Files Community

NHZ commited on Jan 4, 2025

Commit

421a989

verified ·

1 Parent(s): 2a662d5

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -7

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 import faiss
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
 from langchain.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
@@ -62,8 +63,20 @@ def extract_pdf_content(drive_url):
 # Function to create a FAISS vector store from the document content
 def create_vector_store(text):
     sentences = text.split(". ")
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    vector_store = FAISS.from_texts(sentences, embedding=embeddings)
     return vector_store, sentences
 # Streamlit app
@@ -93,9 +106,9 @@ if text:
         prompt_template = PromptTemplate(
             template="""
             Use the following context to answer the question:
             {context}
             Question: {question}
             Answer:""",
             input_variables=["context", "question"]
@@ -109,8 +122,17 @@ if text:
             return_source_documents=True  # Optional
         )
-        # Run the query through the QA chain
-        result = qa_chain.run(query)
-        st.write("Answer:", result)
 else:
     st.error("Failed to extract content from the document.")

 import faiss
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModel
 from langchain.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.chains import RetrievalQA
 # Function to create a FAISS vector store from the document content
 def create_vector_store(text):
     sentences = text.split(". ")
+    # Use Hugging Face transformer model for embeddings
+    model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(model_name)
+    def embed(sentence):
+        tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
+        return embeddings
+    embeddings = [embed(sentence)[0] for sentence in sentences]
+    vector_store = FAISS.from_embeddings(sentences, embeddings)
     return vector_store, sentences
 # Streamlit app
         prompt_template = PromptTemplate(
             template="""
             Use the following context to answer the question:
             {context}
             Question: {question}
             Answer:""",
             input_variables=["context", "question"]
             return_source_documents=True  # Optional
         )
+        # Run the query through the QA chain and get the outputs
+        response = qa_chain({"query": query})
+        answer = response["result"]
+        # Display the result
+        st.write("Answer:", answer)
+        # Optionally display the source documents
+        if "source_documents" in response:
+            st.write("Source Documents:")
+            for doc in response["source_documents"]:
+                st.write(doc.page_content)
 else:
     st.error("Failed to extract content from the document.")