RAG_APP / src /scripts /2_ytb_index.py
sxid003's picture
Upload 83 files
3107242 verified
raw
history blame contribute delete
684 Bytes
from src.scraping.youtube_transcript import fetch_youtube_transcripts
from src.youtube_embd.preprocess import run_preprocessing
from src.youtube_embd.embed import run_embedding
from src.youtube_embd.index import run_indexing
from src.configs.config import MAX_VIDEOS
if __name__ == "__main__":
print("\n-> Lancement de l'extraction des transcriptions YouTube...")
fetch_youtube_transcripts(max_videos=MAX_VIDEOS)
print("\n-> Prétraitement des sous-titres...")
run_preprocessing()
print("\n-> Génération des embeddings...")
run_embedding()
print("\n-> Indexation FAISS...")
run_indexing()
print("\n-> Tous les traitements sont terminés.")