RAG / prepare_embedding.py
MohamedBouhamed's picture
fixing issues-2
516b795
raw
history blame contribute delete
640 Bytes
# prepare_embeddings.py
from sentence_transformers import SentenceTransformer
from langchain_chroma import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
import pickle
# Charger tous les chunks (copier le code de traitement des PDFs)
# ... [ton code de traitement des PDFs] ...
# Créer les embeddings
embedding_model = SentenceTransformerEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
all_chunks = [chunk['content'] for chunk in chunks]
# Sauvegarder
db = Chroma.from_texts(all_chunks, embedding=embedding_model, persist_directory="./chroma_db")
print("✅ Embeddings saved to ./chroma_db")