File size: 1,379 Bytes
68ac8d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import json
from supabase import create_client
from sentence_transformers import SentenceTransformer

# Load environment variables
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_SERVICE_KEY = os.environ["SUPABASE_SERVICE_KEY"]

# Connect to Supabase
supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)

# Load the sentence-transformer model
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# Load the .jsonl file
with open("data/metadata.jsonl", "r") as f:
    for line in f:
        item = json.loads(line)

        question = item.get("Question")
        answer = item.get("Final answer", "")

        # Format content like LangChain expects
        content = f"Question: {question}\nAnswer: {answer}"
        embedding = embedder.encode(content).tolist()

        # Optional metadata, remove large fields like step-by-step details if not needed
        metadata = {
            "task_id": item.get("task_id"),
            "level": item.get("Level"),
            "file_name": item.get("file_name"),
            "annotator_metadata": item.get("Annotator Metadata", {})
        }

        # Insert into Supabase
        supabase.table("documents").insert({
            "content": content,
            "embedding": embedding,
            "metadata": metadata
        }).execute()

        print(f"✅ Inserted: {item['task_id']}")