NHZ commited on
Commit
3e73409
·
verified ·
1 Parent(s): 4f32679

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -5,7 +5,7 @@ import faiss
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
  from transformers import AutoTokenizer, AutoModel
8
- import torch # Import torch for tensor operations
9
  from langchain.vectorstores import FAISS
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.chains import RetrievalQA
@@ -63,7 +63,8 @@ def extract_pdf_content(drive_url):
63
 
64
  # Function to create a FAISS vector store from the document content
65
  def create_vector_store(text):
66
- sentences = text.split(". ")
 
67
 
68
  # Use Hugging Face transformer model for embeddings
69
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
@@ -72,11 +73,14 @@ def create_vector_store(text):
72
 
73
  def embed(sentence):
74
  tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
75
- with torch.no_grad(): # Use torch for no_grad context
76
  embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
77
  return embeddings
78
 
 
79
  embeddings = [embed(sentence)[0] for sentence in sentences]
 
 
80
  vector_store = FAISS.from_embeddings(sentences, embeddings)
81
  return vector_store, sentences
82
 
 
5
  from PyPDF2 import PdfReader
6
  from sentence_transformers import SentenceTransformer
7
  from transformers import AutoTokenizer, AutoModel
8
+ import torch
9
  from langchain.vectorstores import FAISS
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.chains import RetrievalQA
 
63
 
64
  # Function to create a FAISS vector store from the document content
65
  def create_vector_store(text):
66
+ # Split the text into sentences and clean it
67
+ sentences = [sentence.strip() for sentence in text.split(". ") if sentence.strip()]
68
 
69
  # Use Hugging Face transformer model for embeddings
70
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
 
73
 
74
  def embed(sentence):
75
  tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
76
+ with torch.no_grad():
77
  embeddings = model(**tokens).last_hidden_state.mean(dim=1).numpy()
78
  return embeddings
79
 
80
+ # Generate embeddings for cleaned sentences
81
  embeddings = [embed(sentence)[0] for sentence in sentences]
82
+
83
+ # Create a FAISS vector store with valid embeddings
84
  vector_store = FAISS.from_embeddings(sentences, embeddings)
85
  return vector_store, sentences
86