{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "09226255", "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "documents = []\n", "with open(\"./metadata.jsonl\", 'r') as f:\n", " \n", " \n", " for doc in f:\n", " documents.append(json.loads(doc))" ] }, { "cell_type": "code", "execution_count": 5, "id": "5f5389a4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'task_id': 'c61d22de-5f6c-4958-a7f6-5e9707bd3466',\n", " 'Question': 'A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?',\n", " 'Level': 2,\n", " 'Final answer': 'egalitarian',\n", " 'file_name': '',\n", " 'Annotator Metadata': {'Steps': '1. Go to arxiv.org and navigate to the Advanced Search page.\\n2. Enter \"AI regulation\" in the search box and select \"All fields\" from the dropdown.\\n3. Enter 2022-06-01 and 2022-07-01 into the date inputs, select \"Submission date (original)\", and submit the search.\\n4. Go through the search results to find the article that has a figure with three axes and labels on each end of the axes, titled \"Fairness in Agreement With European Values: An Interdisciplinary Perspective on AI Regulation\".\\n5. Note the six words used as labels: deontological, egalitarian, localized, standardized, utilitarian, and consequential.\\n6. Go back to arxiv.org\\n7. Find \"Physics and Society\" and go to the page for the \"Physics and Society\" category.\\n8. Note that the tag for this category is \"physics.soc-ph\".\\n9. Go to the Advanced Search page.\\n10. Enter \"physics.soc-ph\" in the search box and select \"All fields\" from the dropdown.\\n11. Enter 2016-08-11 and 2016-08-12 into the date inputs, select \"Submission date (original)\", and submit the search.\\n12. Search for instances of the six words in the results to find the paper titled \"Phase transition from egalitarian to hierarchical societies driven by competition between cognitive and social constraints\", indicating that \"egalitarian\" is the correct answer.',\n", " 'Number of steps': '12',\n", " 'How long did this take?': '8 minutes',\n", " 'Tools': '1. Web browser\\n2. Image recognition tools (to identify and parse a figure with three axes)',\n", " 'Number of tools': '2'}}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "documents[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "ff72589b", "metadata": {}, "outputs": [], "source": [ "def filt_level1(docs):\n", " \n", " firstlevel_docs = [ doc for doc in docs if doc[\"Level\"] == 1\n", " \n", " ]\n", " return firstlevel_docs" ] }, { "cell_type": "code", "execution_count": null, "id": "6f75e308", "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "cannot import name 'HuggingFaceEmbeddings' from 'langchain.embeddings' (c:\\Users\\ivanml\\AppData\\Local\\anaconda3\\envs\\venv_agent\\lib\\site-packages\\langchain\\embeddings\\__init__.py)", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m HuggingFaceEmbeddings\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlangchain_community\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorstores\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SupabaseVectorStore\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msupabase\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_client\n", "\u001b[1;31mImportError\u001b[0m: cannot import name 'HuggingFaceEmbeddings' from 'langchain.embeddings' (c:\\Users\\ivanml\\AppData\\Local\\anaconda3\\envs\\venv_agent\\lib\\site-packages\\langchain\\embeddings\\__init__.py)" ] } ], "source": [ "import os\n", "from dotenv import load_dotenv\n", "from langchain_huggingface import HuggingFaceEmbeddings\n", "from langchain_community.vectorstores import SupabaseVectorStore\n", "from supabase.client import create_client\n", "\n", "load_dotenv()\n", "\n", "# Leer credenciales desde variables de entorno\n", "SUPABASE_URL = os.getenv(\"SUPABASE_URL\")\n", "SUPABASE_KEY = os.getenv(\"SUPABASE_KEY\")\n", "\n", "# Inicializar cliente Supabase\n", "supabase = create_client(SUPABASE_URL, SUPABASE_KEY)\n", "\n", "# Inicializar embeddings\n", "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n", "\n", "# Preparar registros para insertar\n", "table_records = []\n", "for doc in filtered_docs:\n", " content = f\"Question: {doc['Question']}\\nFinal answer: {doc['Final answer']}\"\n", " record = {\n", " \"content\": content,\n", " \"embedding\": embeddings.embed_query(content)\n", " }\n", " table_records.append(record)\n", "\n", "# Insertar registros en Supabase\n", "response = supabase.table(\"documents\").insert(table_records).execute()\n", "\n", "# Inicializar vector store\n", "vector_store = SupabaseVectorStore(\n", " embedding=embeddings,\n", " client=supabase,\n", " table_name=\"documents\",\n", " query_name=\"match_documents\"\n", ")\n", "\n", "# Buscar documentos similares\n", "query_text = (\"If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, \"\n", " \"how many thousand hours would it take him to run the distance between the Earth \"\n", " \"and the Moon at its closest approach?\")\n", "results = vector_store.similarity_search(query=query_text, k=1)\n", "\n", "# Obtener respuesta final\n", "final_answer = results[0].page_content.split(\"Final answer:\")[-1].strip()\n", "print(final_answer)\n" ] } ], "metadata": { "kernelspec": { "display_name": "venv_agent", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 5 }