Spaces:
Sleeping
Sleeping
File size: 1,379 Bytes
68ac8d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import json
from supabase import create_client
from sentence_transformers import SentenceTransformer
# Load environment variables
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_SERVICE_KEY = os.environ["SUPABASE_SERVICE_KEY"]
# Connect to Supabase
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
# Load the sentence-transformer model
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# Load the .jsonl file
with open("data/metadata.jsonl", "r") as f:
for line in f:
item = json.loads(line)
question = item.get("Question")
answer = item.get("Final answer", "")
# Format content like LangChain expects
content = f"Question: {question}\nAnswer: {answer}"
embedding = embedder.encode(content).tolist()
# Optional metadata, remove large fields like step-by-step details if not needed
metadata = {
"task_id": item.get("task_id"),
"level": item.get("Level"),
"file_name": item.get("file_name"),
"annotator_metadata": item.get("Annotator Metadata", {})
}
# Insert into Supabase
supabase.table("documents").insert({
"content": content,
"embedding": embedding,
"metadata": metadata
}).execute()
print(f"✅ Inserted: {item['task_id']}")
|