Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| from supabase import create_client | |
| from sentence_transformers import SentenceTransformer | |
| # Load environment variables | |
| SUPABASE_URL = os.environ["SUPABASE_URL"] | |
| SUPABASE_SERVICE_KEY = os.environ["SUPABASE_SERVICE_KEY"] | |
| # Connect to Supabase | |
| supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY) | |
| # Load the sentence-transformer model | |
| embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") | |
| # Load the .jsonl file | |
| with open("data/metadata.jsonl", "r") as f: | |
| for line in f: | |
| item = json.loads(line) | |
| question = item.get("Question") | |
| answer = item.get("Final answer", "") | |
| # Format content like LangChain expects | |
| content = f"Question: {question}\nAnswer: {answer}" | |
| embedding = embedder.encode(content).tolist() | |
| # Optional metadata, remove large fields like step-by-step details if not needed | |
| metadata = { | |
| "task_id": item.get("task_id"), | |
| "level": item.get("Level"), | |
| "file_name": item.get("file_name"), | |
| "annotator_metadata": item.get("Annotator Metadata", {}) | |
| } | |
| # Insert into Supabase | |
| supabase.table("documents").insert({ | |
| "content": content, | |
| "embedding": embedding, | |
| "metadata": metadata | |
| }).execute() | |
| print(f"✅ Inserted: {item['task_id']}") | |