ELHACHYMI commited on
Commit
508def0
·
verified ·
1 Parent(s): a316fee

title embd script

Browse files
Files changed (1) hide show
  1. src/scripts/4_title_embeddings.py +22 -0
src/scripts/4_title_embeddings.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.title_embd.preprocessing import preprocess_titles
2
+ from src.title_embd.embed import generate_title_embeddings
3
+ from src.title_embd.index import create_title_faiss_index
4
+ import logging
5
+
6
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
7
+
8
+ def run():
9
+ logging.info("Step 1: Preprocessing document titles")
10
+ title_data = preprocess_titles()
11
+ logging.info("Step 1: Title preprocessing complete")
12
+
13
+ logging.info("Step 2: Generating title embeddings")
14
+ generate_title_embeddings(title_data)
15
+ logging.info("Step 2: Title embeddings generated")
16
+
17
+ logging.info("Step 3: Creating FAISS index for titles")
18
+ create_title_faiss_index()
19
+ logging.info("Step 3: FAISS index for titles created")
20
+
21
+ if __name__ == "__main__":
22
+ run()