Spaces:
Runtime error
Runtime error
| #build retriever on supabase | |
| #create project, table, indexes, and functions | |
| #create client with url and key | |
| #insert data with embedding | |
| # | |
| # Load metadata.jsonl | |
| import json | |
| import os | |
| from dotenv import load_dotenv | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import SupabaseVectorStore | |
| from supabase.client import Client, create_client | |
| from langchain.schema import Document | |
| # Load the metadata.jsonl file | |
| with open('metadata.jsonl', 'r') as jsonl_file: | |
| json_list = list(jsonl_file) | |
| json_QA = [] | |
| for json_str in json_list: | |
| json_data = json.loads(json_str) | |
| json_QA.append(json_data) | |
| ### build a vector database based on the metadata.jsonl | |
| # https://python.langchain.com/docs/integrations/vectorstores/supabase/ | |
| load_dotenv() | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768 | |
| supabase_url = os.environ.get("SUPABASE_URL") | |
| supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") | |
| supabase: Client = create_client(supabase_url, supabase_key) | |
| # wrap the metadata.jsonl's questions and answers into a list of document | |
| docs = [] | |
| for sample in json_QA: | |
| content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}" | |
| doc = { | |
| "content" : content, | |
| "metadata" : { # meatadata็ๆ ผๅผๅฟ ้กปๆถsource้ฎ๏ผๅฆๅไผๆฅ้ | |
| "source" : sample['task_id'] | |
| }, | |
| "embedding" : embeddings.embed_query(content), | |
| } | |
| docs.append(doc) | |
| table_name = os.environ.get('TABLE_NAME') | |
| # upload the documents to the vector database | |
| try: | |
| response = ( | |
| supabase.table("documents") | |
| .insert(docs) | |
| .execute() | |
| ) | |
| except Exception as exception: | |
| print("Error inserting data into Supabase:", exception) |