RAG_APP / src /title_embd /preprocessing.py
ELHACHYMI's picture
title embed code
f2aadac verified
raw
history blame contribute delete
295 Bytes
import pandas as pd
from src.configs.config import METADATA_FILE
def preprocess_titles():
metadata = pd.read_csv(METADATA_FILE)
titles = metadata["Nom du document"].tolist()
links = metadata["Lien"].tolist()
return [{"title": t, "link": l} for t, l in zip(titles, links)]