Final_Assignment_Template / load_vectorstore.py
pavan-d's picture
Create load_vectorstore.py
68ac8d1 verified
import os
import json
from supabase import create_client
from sentence_transformers import SentenceTransformer
# Load environment variables
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_SERVICE_KEY = os.environ["SUPABASE_SERVICE_KEY"]
# Connect to Supabase
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
# Load the sentence-transformer model
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# Load the .jsonl file
with open("data/metadata.jsonl", "r") as f:
for line in f:
item = json.loads(line)
question = item.get("Question")
answer = item.get("Final answer", "")
# Format content like LangChain expects
content = f"Question: {question}\nAnswer: {answer}"
embedding = embedder.encode(content).tolist()
# Optional metadata, remove large fields like step-by-step details if not needed
metadata = {
"task_id": item.get("task_id"),
"level": item.get("Level"),
"file_name": item.get("file_name"),
"annotator_metadata": item.get("Annotator Metadata", {})
}
# Insert into Supabase
supabase.table("documents").insert({
"content": content,
"embedding": embedding,
"metadata": metadata
}).execute()
print(f"✅ Inserted: {item['task_id']}")