File size: 839 Bytes
389c5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import numpy as np
import pandas as pd
from llama_index.embeddings.openai import OpenAIEmbedding
import os
from dotenv import load_dotenv
load_dotenv("config.env")
os.environ.get("OPENAI_API_KEY")

# load vectorstore summaries
df = pd.read_csv("data/raw/guidelines_summaries.csv")

# Embed summaries
embedding_model = OpenAIEmbedding()
summary_embeddings = []

for summary in df["summary"]:
    emb = embedding_model.get_text_embedding(summary)
    summary_embeddings.append(emb)

summary_embeddings = np.vstack(summary_embeddings)

# Save embeddings and metadata
os.makedirs("data/processed/lp/summary_embeddings", exist_ok=True)

np.save("data/processed/lp/summary_embeddings/embeddings.npy", summary_embeddings)
df.to_csv("data/processed/lp/summary_embeddings/index.tsv", sep="\t", index=False)

print("✅ Saved embeddings and index.")