Spaces:
Sleeping
Sleeping
File size: 839 Bytes
389c5f0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import numpy as np
import pandas as pd
from llama_index.embeddings.openai import OpenAIEmbedding
import os
from dotenv import load_dotenv
load_dotenv("config.env")
os.environ.get("OPENAI_API_KEY")
# load vectorstore summaries
df = pd.read_csv("data/raw/guidelines_summaries.csv")
# Embed summaries
embedding_model = OpenAIEmbedding()
summary_embeddings = []
for summary in df["summary"]:
emb = embedding_model.get_text_embedding(summary)
summary_embeddings.append(emb)
summary_embeddings = np.vstack(summary_embeddings)
# Save embeddings and metadata
os.makedirs("data/processed/lp/summary_embeddings", exist_ok=True)
np.save("data/processed/lp/summary_embeddings/embeddings.npy", summary_embeddings)
df.to_csv("data/processed/lp/summary_embeddings/index.tsv", sep="\t", index=False)
print("✅ Saved embeddings and index.") |