Spaces:
Runtime error
Runtime error
File size: 7,754 Bytes
7408c8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "09226255",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"documents = []\n",
"with open(\"./metadata.jsonl\", 'r') as f:\n",
" \n",
" \n",
" for doc in f:\n",
" documents.append(json.loads(doc))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5f5389a4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'task_id': 'c61d22de-5f6c-4958-a7f6-5e9707bd3466',\n",
" 'Question': 'A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?',\n",
" 'Level': 2,\n",
" 'Final answer': 'egalitarian',\n",
" 'file_name': '',\n",
" 'Annotator Metadata': {'Steps': '1. Go to arxiv.org and navigate to the Advanced Search page.\\n2. Enter \"AI regulation\" in the search box and select \"All fields\" from the dropdown.\\n3. Enter 2022-06-01 and 2022-07-01 into the date inputs, select \"Submission date (original)\", and submit the search.\\n4. Go through the search results to find the article that has a figure with three axes and labels on each end of the axes, titled \"Fairness in Agreement With European Values: An Interdisciplinary Perspective on AI Regulation\".\\n5. Note the six words used as labels: deontological, egalitarian, localized, standardized, utilitarian, and consequential.\\n6. Go back to arxiv.org\\n7. Find \"Physics and Society\" and go to the page for the \"Physics and Society\" category.\\n8. Note that the tag for this category is \"physics.soc-ph\".\\n9. Go to the Advanced Search page.\\n10. Enter \"physics.soc-ph\" in the search box and select \"All fields\" from the dropdown.\\n11. Enter 2016-08-11 and 2016-08-12 into the date inputs, select \"Submission date (original)\", and submit the search.\\n12. Search for instances of the six words in the results to find the paper titled \"Phase transition from egalitarian to hierarchical societies driven by competition between cognitive and social constraints\", indicating that \"egalitarian\" is the correct answer.',\n",
" 'Number of steps': '12',\n",
" 'How long did this take?': '8 minutes',\n",
" 'Tools': '1. Web browser\\n2. Image recognition tools (to identify and parse a figure with three axes)',\n",
" 'Number of tools': '2'}}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"documents[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff72589b",
"metadata": {},
"outputs": [],
"source": [
"def filt_level1(docs):\n",
" \n",
" firstlevel_docs = [ doc for doc in docs if doc[\"Level\"] == 1\n",
" \n",
" ]\n",
" return firstlevel_docs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f75e308",
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'HuggingFaceEmbeddings' from 'langchain.embeddings' (c:\\Users\\ivanml\\AppData\\Local\\anaconda3\\envs\\venv_agent\\lib\\site-packages\\langchain\\embeddings\\__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mdotenv\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_dotenv\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m HuggingFaceEmbeddings\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlangchain_community\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorstores\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SupabaseVectorStore\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msupabase\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_client\n",
"\u001b[1;31mImportError\u001b[0m: cannot import name 'HuggingFaceEmbeddings' from 'langchain.embeddings' (c:\\Users\\ivanml\\AppData\\Local\\anaconda3\\envs\\venv_agent\\lib\\site-packages\\langchain\\embeddings\\__init__.py)"
]
}
],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import SupabaseVectorStore\n",
"from supabase.client import create_client\n",
"\n",
"load_dotenv()\n",
"\n",
"# Leer credenciales desde variables de entorno\n",
"SUPABASE_URL = os.getenv(\"SUPABASE_URL\")\n",
"SUPABASE_KEY = os.getenv(\"SUPABASE_KEY\")\n",
"\n",
"# Inicializar cliente Supabase\n",
"supabase = create_client(SUPABASE_URL, SUPABASE_KEY)\n",
"\n",
"# Inicializar embeddings\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n",
"\n",
"# Preparar registros para insertar\n",
"table_records = []\n",
"for doc in filtered_docs:\n",
" content = f\"Question: {doc['Question']}\\nFinal answer: {doc['Final answer']}\"\n",
" record = {\n",
" \"content\": content,\n",
" \"embedding\": embeddings.embed_query(content)\n",
" }\n",
" table_records.append(record)\n",
"\n",
"# Insertar registros en Supabase\n",
"response = supabase.table(\"documents\").insert(table_records).execute()\n",
"\n",
"# Inicializar vector store\n",
"vector_store = SupabaseVectorStore(\n",
" embedding=embeddings,\n",
" client=supabase,\n",
" table_name=\"documents\",\n",
" query_name=\"match_documents\"\n",
")\n",
"\n",
"# Buscar documentos similares\n",
"query_text = (\"If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, \"\n",
" \"how many thousand hours would it take him to run the distance between the Earth \"\n",
" \"and the Moon at its closest approach?\")\n",
"results = vector_store.similarity_search(query=query_text, k=1)\n",
"\n",
"# Obtener respuesta final\n",
"final_answer = results[0].page_content.split(\"Final answer:\")[-1].strip()\n",
"print(final_answer)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv_agent",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|