| | from langchain_community.embeddings import HuggingFaceEmbeddings |
| | from langchain_community.vectorstores import FAISS |
| | from langchain.schema import Document |
| | from typing import List |
| |
|
| |
|
| | class Retrieval: |
| | def __init__(self, model_name, max_model_tokens=384): |
| | """ |
| | Initialize Retrieval class with HuggingFace embeddings and FAISS vector store. |
| | |
| | Parameters: |
| | model_name (str): The name of the HuggingFace model to use for embeddings. |
| | max_model_tokens (int, optional): The maximum number of tokens to use for encoding. Defaults to 384. |
| | |
| | Returns: |
| | None |
| | """ |
| | self.model_name = model_name |
| | self.embeddings = HuggingFaceEmbeddings( |
| | model_name=model_name, |
| | encode_kwargs={"max_length": max_model_tokens, "truncation": True}, |
| | ) |
| |
|
| | def create_vector_store(self, chunks: List[Document]): |
| | """Creates a new vector store for similarity search""" |
| | self.chunks = chunks |
| | |
| | self.vectorstore = FAISS.from_documents(self.chunks, self.embeddings) |
| |
|
| | def search(self, query, k=10) -> List[Document]: |
| | """Search top matching documents""" |
| | |
| | similar_docs = self.vectorstore.similarity_search(query, k) |
| |
|
| | return similar_docs |
| |
|