NHZ commited on
Commit
421a989
·
verified ·
1 Parent(s): 2a662d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -7
app.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  import faiss
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
 
7
  from langchain.vectorstores import FAISS
8
  from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.chains import RetrievalQA
@@ -62,8 +63,20 @@ def extract_pdf_content(drive_url):
62
  # Function to create a FAISS vector store from the document content
63
  def create_vector_store(text):
64
  sentences = text.split(". ")
65
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
66
- vector_store = FAISS.from_texts(sentences, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
67
  return vector_store, sentences
68
 
69
  # Streamlit app
@@ -93,9 +106,9 @@ if text:
93
  prompt_template = PromptTemplate(
94
  template="""
95
  Use the following context to answer the question:
96
-
97
  {context}
98
-
99
  Question: {question}
100
  Answer:""",
101
  input_variables=["context", "question"]
@@ -109,8 +122,17 @@ if text:
109
  return_source_documents=True # Optional
110
  )
111
 
112
- # Run the query through the QA chain
113
- result = qa_chain.run(query)
114
- st.write("Answer:", result)
 
 
 
 
 
 
 
 
 
115
  else:
116
  st.error("Failed to extract content from the document.")
 
4
  import faiss
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
+ from transformers import AutoTokenizer, AutoModel
8
  from langchain.vectorstores import FAISS
9
  from langchain.embeddings import HuggingFaceEmbeddings
10
  from langchain.chains import RetrievalQA
 
63
  # Function to create a FAISS vector store from the document content
64
  def create_vector_store(text):
65
  sentences = text.split(". ")
66
+
67
+ # Use Hugging Face transformer model for embeddings
68
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
69
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
70
+ model = AutoModel.from_pretrained(model_name)
71
+
72
+ def embed(sentence):
73
+ tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
74
+ with torch.no_grad():
75
+ embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
76
+ return embeddings
77
+
78
+ embeddings = [embed(sentence)[0] for sentence in sentences]
79
+ vector_store = FAISS.from_embeddings(sentences, embeddings)
80
  return vector_store, sentences
81
 
82
  # Streamlit app
 
106
  prompt_template = PromptTemplate(
107
  template="""
108
  Use the following context to answer the question:
109
+
110
  {context}
111
+
112
  Question: {question}
113
  Answer:""",
114
  input_variables=["context", "question"]
 
122
  return_source_documents=True # Optional
123
  )
124
 
125
+ # Run the query through the QA chain and get the outputs
126
+ response = qa_chain({"query": query})
127
+ answer = response["result"]
128
+
129
+ # Display the result
130
+ st.write("Answer:", answer)
131
+
132
+ # Optionally display the source documents
133
+ if "source_documents" in response:
134
+ st.write("Source Documents:")
135
+ for doc in response["source_documents"]:
136
+ st.write(doc.page_content)
137
  else:
138
  st.error("Failed to extract content from the document.")