Spaces:
Sleeping
Sleeping
title embd script
Browse files
src/scripts/4_title_embeddings.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.title_embd.preprocessing import preprocess_titles
|
| 2 |
+
from src.title_embd.embed import generate_title_embeddings
|
| 3 |
+
from src.title_embd.index import create_title_faiss_index
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
|
| 7 |
+
|
| 8 |
+
def run():
|
| 9 |
+
logging.info("Step 1: Preprocessing document titles")
|
| 10 |
+
title_data = preprocess_titles()
|
| 11 |
+
logging.info("Step 1: Title preprocessing complete")
|
| 12 |
+
|
| 13 |
+
logging.info("Step 2: Generating title embeddings")
|
| 14 |
+
generate_title_embeddings(title_data)
|
| 15 |
+
logging.info("Step 2: Title embeddings generated")
|
| 16 |
+
|
| 17 |
+
logging.info("Step 3: Creating FAISS index for titles")
|
| 18 |
+
create_title_faiss_index()
|
| 19 |
+
logging.info("Step 3: FAISS index for titles created")
|
| 20 |
+
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
run()
|