Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import os | |
| from langchain_core.documents import Document | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| def create_Doc(data): | |
| documents = [] | |
| for num, i in data.iterrows(): | |
| documents.append(Document( | |
| page_content=i.lyric, | |
| metadata={"name": i.title, "id": num} | |
| )) | |
| return documents | |
| def load_embedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'): | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
| ) | |
| return embeddings | |
| def load_vectorstore(documents, embeddings): | |
| vectorstore = Chroma.from_documents( | |
| documents, | |
| embedding=embeddings, | |
| ) | |
| return vectorstore | |
| def process(list_text, vectorstore, search_type = 'mmr'): | |
| vectorstore.as_retriever(search_type= search_type) | |
| retrieves = [] | |
| for i in list_text: | |
| retrieves.append(vectorstore.invoke(i)) | |
| return retrieves |