{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "70227cfd", "metadata": {}, "outputs": [], "source": [ "from sqlalchemy import create_engine\n", "from dotenv import load_dotenv\n", "from sqlalchemy import text\n", "import pandas as pd\n", "import os\n", "\n", "load_dotenv()\n", "\n", "DB_USER = os.getenv('DB_USER')\n", "DB_PASS = os.getenv('DB_PASS')\n", "DB_HOST = os.getenv('DB_HOST')\n", "DB_PORT = os.getenv('DB_PORT')\n", "DB_NAME = os.getenv('DB_NAME')\n", "PATH_TO_CERT = os.getenv('PATH_TO_CERT')\n", "\n", "connection_str = f\"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}\"\n", "\n", "engine = create_engine(\n", " connection_str,\n", " connect_args={\n", " \"sslmode\": \"verify-full\",\n", " \"sslrootcert\": PATH_TO_CERT,\n", " \"target_session_attrs\": \"read-write\"\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "fd49e25a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "167" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"/Users/incllude/dev/rag_tg_2025/generated_qa.csv\")\n", "df.to_sql('test_cases', engine, if_exists='replace', index=False)" ] }, { "cell_type": "code", "execution_count": 11, "id": "0bb4f789", "metadata": {}, "outputs": [], "source": [ "create_table_query = \"\"\"\n", "drop table if exists posts;\n", "\"\"\"\n", "\n", "try:\n", " with engine.begin() as conn:\n", " conn.execute(text(create_table_query))\n", "except Exception as e:\n", " print(\"Ошибка:\", e)\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "e687fcdb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
message_idchannel_idmessage_dtviewscontent
0137228rbc_news2025-12-0340045Суд признал писателя Бориса Акунина (настоящее...
1137226rbc_news2025-12-0353463На характере переговоров с США, прошедших нака...
2137224rbc_news2025-12-0356667Еврокомиссия намерена запретить исполнение вну...
3137223rbc_news2025-12-0355604Прямо сейчас в эфире Радио РБК обсуждаем планы...
4137222rbc_news2025-12-0361798Коллегия Еврокомиссии одобрила «потенциальный ...
..................
4795116045rbc_news2025-04-15108803Суд на Сахалине вынес первое решение по делу о...
4796116044rbc_news2025-04-15108074Оператор национальных лотерей Франции стал отв...
4797116043rbc_news2025-04-15143013Слоны во время землетрясения в зоопарке Сан-Ди...
4798116041rbc_news2025-04-15125020Аэропорт южнокорейского города Муан регулярно ...
4799116039rbc_news2025-04-15156002Первоклассники не должны заниматься уроками бо...
\n", "

4800 rows × 5 columns

\n", "
" ], "text/plain": [ " message_id channel_id message_dt views \\\n", "0 137228 rbc_news 2025-12-03 40045 \n", "1 137226 rbc_news 2025-12-03 53463 \n", "2 137224 rbc_news 2025-12-03 56667 \n", "3 137223 rbc_news 2025-12-03 55604 \n", "4 137222 rbc_news 2025-12-03 61798 \n", "... ... ... ... ... \n", "4795 116045 rbc_news 2025-04-15 108803 \n", "4796 116044 rbc_news 2025-04-15 108074 \n", "4797 116043 rbc_news 2025-04-15 143013 \n", "4798 116041 rbc_news 2025-04-15 125020 \n", "4799 116039 rbc_news 2025-04-15 156002 \n", "\n", " content \n", "0 Суд признал писателя Бориса Акунина (настоящее... \n", "1 На характере переговоров с США, прошедших нака... \n", "2 Еврокомиссия намерена запретить исполнение вну... \n", "3 Прямо сейчас в эфире Радио РБК обсуждаем планы... \n", "4 Коллегия Еврокомиссии одобрила «потенциальный ... \n", "... ... \n", "4795 Суд на Сахалине вынес первое решение по делу о... \n", "4796 Оператор национальных лотерей Франции стал отв... \n", "4797 Слоны во время землетрясения в зоопарке Сан-Ди... \n", "4798 Аэропорт южнокорейского города Муан регулярно ... \n", "4799 Первоклассники не должны заниматься уроками бо... \n", "\n", "[4800 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with engine.connect() as conn:\n", " df = pd.read_sql('''\n", "select * from posts\n", " ''', conn)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 73, "id": "8abd0803", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
message_idchannel_idmessage_dtviewscontent
4498130471rbc_news2025-09-12139946Адвокат бывшего мэра Стамбула Экрема Имамоглу ...
1054134025rbc_news2025-10-22112469Госдума приняла в первом чтении проект бюджета...
283136520rbc_news2025-11-24118238Силы ПВО в период с 14:00 до 20:00 мск уничтож...
4406124142rbc_news2025-07-15122075В Госдуме из-за мощных ливней затопило курилку...
1378135060rbc_news2025-11-05114842Американская актриса и посол доброй воли ЮНИСЕ...
..................
1467130799rbc_news2025-09-17135139Кинокомпании Walt Disney, Universal и Warner B...
4581123624rbc_news2025-07-10121628Главные новости к утру — на телеканале РБК
4281130775rbc_news2025-09-16120652Путин примерил тепловизионные очки «Стрекоза» ...
241136585rbc_news2025-11-25101214Российского посла вызвали в МИД Молдавии из-за...
1947128465rbc_news2025-08-23163463Ограничения полетов ввели в аэропорту Ульяновс...
\n", "

4800 rows × 5 columns

\n", "
" ], "text/plain": [ " message_id channel_id message_dt views \\\n", "4498 130471 rbc_news 2025-09-12 139946 \n", "1054 134025 rbc_news 2025-10-22 112469 \n", "283 136520 rbc_news 2025-11-24 118238 \n", "4406 124142 rbc_news 2025-07-15 122075 \n", "1378 135060 rbc_news 2025-11-05 114842 \n", "... ... ... ... ... \n", "1467 130799 rbc_news 2025-09-17 135139 \n", "4581 123624 rbc_news 2025-07-10 121628 \n", "4281 130775 rbc_news 2025-09-16 120652 \n", "241 136585 rbc_news 2025-11-25 101214 \n", "1947 128465 rbc_news 2025-08-23 163463 \n", "\n", " content \n", "4498 Адвокат бывшего мэра Стамбула Экрема Имамоглу ... \n", "1054 Госдума приняла в первом чтении проект бюджета... \n", "283 Силы ПВО в период с 14:00 до 20:00 мск уничтож... \n", "4406 В Госдуме из-за мощных ливней затопило курилку... \n", "1378 Американская актриса и посол доброй воли ЮНИСЕ... \n", "... ... \n", "1467 Кинокомпании Walt Disney, Universal и Warner B... \n", "4581 Главные новости к утру — на телеканале РБК \n", "4281 Путин примерил тепловизионные очки «Стрекоза» ... \n", "241 Российского посла вызвали в МИД Молдавии из-за... \n", "1947 Ограничения полетов ввели в аэропорту Ульяновс... \n", "\n", "[4800 rows x 5 columns]" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def strip_edges_allow_punct(s: str):\n", " allowed_punct = set(\".,!?;:-–—\") # можно расширять\n", "\n", " # Левый указатель — пока не буква/цифра\n", " left = 0\n", " while left < len(s) and not s[left].isalnum():\n", " left += 1\n", "\n", " # Правый указатель — пока не буква/цифра/пунктуация\n", " right = len(s) - 1\n", " while right >= 0 and not (s[right].isalnum() or s[right] in allowed_punct):\n", " right -= 1\n", "\n", " # Если всё мусор\n", " if right < left:\n", " return \"\"\n", "\n", " return s[left:right+1]\n", "\n", "\n", "\n", "def process_str(s):\n", " s = \"\\n\".join(strip_edges_allow_punct(p) for p in s.split(\"\\n\") if p)\n", " \n", " for suf in [\n", " \"Слушать прямой эфир\",\n", " \"Читать РБК Стиль в Telegram\",\n", " \"РБК Events, 18\",\n", " \"Подписаться | Онлайн-сомелье\",\n", " \"Читать РБК в Telegram\",\n", " \"Следить за новостями РБК в Telegram\",\n", " \"Следить за новостями РБК в МАХ\",\n", " \"Другие видео этого дня — в телеграм-канале РБК\",\n", " \"РБК в Telegram и MAX\",\n", " \"РБК в Telegram | MAX\",\n", " \"Подписаться на «РБК Спорт\",\n", " \"Картина дня — в телеграм-канале РБК\",\n", " \"Самые важные новости — в канале РБК в МАХ\",\n", " \"Больше инфографики — в телеграм-канале РБК\",\n", " \"Подписаться на «Сам ты инвестор!\",\n", " \"Читать РБК Недвижимость в Telegram\"\n", " ]:\n", " s = s.removesuffix(suf).strip()\n", "\n", " parts = [p for p in s.split(\"\\n\") if p]\n", " \n", " prev_parts = [0] * 1000\n", " while len(prev_parts) != len(parts) and len(parts) != 0:\n", " prev_parts = parts\n", " if \"Фото:\" in parts[-1] or \"Данные:\" in parts[-1]:\n", " parts = parts[:-1]\n", " \n", " return \"\\n\".join(parts)\n", "\n", "import pandas as pd\n", "\n", "rbc = pd.read_csv(\"src/dataset/rbc/channel_rbc_news_posts.csv\")\n", "rbc[\"message_dt\"] = pd.to_datetime(rbc[\"message_dt\"]).dt.date\n", "rbc = rbc[[\"message_id\", \"channel_id\", \"message_dt\", \"views\", \"content\"]].astype({\"views\": int}).sample(len(rbc))\n", "rbc[\"content\"] = rbc[\"content\"].apply(lambda x: process_str(x))\n", "rbc = rbc[rbc[\"content\"].apply(\n", " lambda x: not any(v in [p for p in x.split(\"\\n\") if p][-1] for v in [\"Реклама.\", \"Реклама,\"]) if x else False\n", " )]\n", "rbc" ] }, { "cell_type": "code", "execution_count": 74, "id": "85bf4cbf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "800" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "create_table_query = \"\"\"\n", "drop table if exists posts;\n", "\"\"\"\n", "\n", "try:\n", " with engine.begin() as conn:\n", " conn.execute(text(create_table_query))\n", "except Exception as e:\n", " print(\"Ошибка:\", e)\n", "\n", "rbc.to_sql('posts', engine, if_exists='replace', index=False)" ] }, { "cell_type": "code", "execution_count": 22, "id": "cc99786f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
message_idchannel_idmessage_dtviewscontent
0137228rbc_news2025-12-0340045Суд признал писателя Бориса Акунина (настоящее...
1137226rbc_news2025-12-0353463На характере переговоров с США, прошедших нака...
2137224rbc_news2025-12-0356667Еврокомиссия намерена запретить исполнение вну...
3137223rbc_news2025-12-0355604🎙 Прямо сейчас в эфире Радио РБК обсуждаем пла...
4137222rbc_news2025-12-0361798Коллегия Еврокомиссии одобрила «потенциальный ...
..................
4820116046rbc_news2025-04-15106022Репортаж телеканала РБК из Курска, который ноч...
4821116045rbc_news2025-04-15108803Суд на Сахалине вынес первое решение по делу о...
4822116044rbc_news2025-04-15108074Оператор национальных лотерей Франции стал отв...
4823116041rbc_news2025-04-15125020Аэропорт южнокорейского города Муан регулярно ...
4824116039rbc_news2025-04-15156002Первоклассники не должны заниматься уроками бо...
\n", "

4825 rows × 5 columns

\n", "
" ], "text/plain": [ " message_id channel_id message_dt views \\\n", "0 137228 rbc_news 2025-12-03 40045 \n", "1 137226 rbc_news 2025-12-03 53463 \n", "2 137224 rbc_news 2025-12-03 56667 \n", "3 137223 rbc_news 2025-12-03 55604 \n", "4 137222 rbc_news 2025-12-03 61798 \n", "... ... ... ... ... \n", "4820 116046 rbc_news 2025-04-15 106022 \n", "4821 116045 rbc_news 2025-04-15 108803 \n", "4822 116044 rbc_news 2025-04-15 108074 \n", "4823 116041 rbc_news 2025-04-15 125020 \n", "4824 116039 rbc_news 2025-04-15 156002 \n", "\n", " content \n", "0 Суд признал писателя Бориса Акунина (настоящее... \n", "1 На характере переговоров с США, прошедших нака... \n", "2 Еврокомиссия намерена запретить исполнение вну... \n", "3 🎙 Прямо сейчас в эфире Радио РБК обсуждаем пла... \n", "4 Коллегия Еврокомиссии одобрила «потенциальный ... \n", "... ... \n", "4820 Репортаж телеканала РБК из Курска, который ноч... \n", "4821 Суд на Сахалине вынес первое решение по делу о... \n", "4822 Оператор национальных лотерей Франции стал отв... \n", "4823 Аэропорт южнокорейского города Муан регулярно ... \n", "4824 Первоклассники не должны заниматься уроками бо... \n", "\n", "[4825 rows x 5 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with engine.connect() as conn:\n", " df = pd.read_sql('''\n", "select * from posts\n", " ''', conn)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "id": "18c9a8be", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/gv/dw4pvdvn4kqgq0tgn3m5qp940000gn/T/ipykernel_42516/890927509.py:10: UserWarning: Qdrant client version 1.16.2 is incompatible with server version 1.14.1. Major versions should match and minor version difference must not exceed 1. Set check_compatibility=False to skip version check.\n", " client = QdrantClient(\n" ] } ], "source": [ "from qdrant_client import QdrantClient, models\n", "from dotenv import load_dotenv\n", "import numpy as np\n", "import os\n", "\n", "load_dotenv()\n", "\n", "QDRANT_URL = os.getenv('QDRANT_URL')\n", "\n", "client = QdrantClient(\n", " url=QDRANT_URL,\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "a0c3386a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.delete_collection(collection_name=\"recursive_USER-bge-m3\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "7431dcf9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.create_collection(\n", " collection_name=\"my_collection\",\n", " vectors_config=models.VectorParams(\n", " size=384, \n", " distance=models.Distance.COSINE\n", " )\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7e9e6318", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "UpdateResult(operation_id=3, status=)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "points = [\n", " models.PointStruct(\n", " id=1,\n", " vector=np.ones(384).tolist(),\n", " payload={\"text\": \"Пример документа 1\"}\n", " ),\n", " models.PointStruct(\n", " id=2,\n", " vector=(-np.ones(384)).tolist(),\n", " payload={\"text\": \"Пример документа 2\"}\n", " )\n", "]\n", "\n", "client.upsert(\n", " collection_name=\"my_collection\",\n", " points=points,\n", " wait=True\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "2186428b", "metadata": {}, "outputs": [ { "ename": "UnexpectedResponse", "evalue": "Unexpected Response: 404 (Not Found)\nRaw response content:\nb'{\"status\":{\"error\":\"Not found: Collection `my_collection` doesn\\'t exist!\"},\"time\":0.004926893}'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mUnexpectedResponse\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m results = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery_points\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmy_collection\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[43m-\u001b[49m\u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mones\u001b[49m\u001b[43m(\u001b[49m\u001b[32;43m384\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtolist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mlimit\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\n\u001b[32m 5\u001b[39m \u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/qdrant_client.py:423\u001b[39m, in \u001b[36mQdrantClient.query_points\u001b[39m\u001b[34m(self, collection_name, query, using, prefetch, query_filter, search_params, limit, offset, with_payload, with_vectors, score_threshold, lookup_from, consistency, shard_key_selector, timeout, **kwargs)\u001b[39m\n\u001b[32m 408\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 409\u001b[39m prefetch = (\n\u001b[32m 410\u001b[39m \u001b[38;5;28mnext\u001b[39m(\n\u001b[32m 411\u001b[39m \u001b[38;5;28miter\u001b[39m(\n\u001b[32m (...)\u001b[39m\u001b[32m 420\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 421\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m423\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery_points\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 424\u001b[39m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 425\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 426\u001b[39m \u001b[43m \u001b[49m\u001b[43mprefetch\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprefetch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 427\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_filter\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery_filter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 428\u001b[39m \u001b[43m \u001b[49m\u001b[43msearch_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43msearch_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 429\u001b[39m \u001b[43m \u001b[49m\u001b[43mlimit\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlimit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 430\u001b[39m \u001b[43m \u001b[49m\u001b[43moffset\u001b[49m\u001b[43m=\u001b[49m\u001b[43moffset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 431\u001b[39m \u001b[43m \u001b[49m\u001b[43mwith_payload\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwith_payload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 432\u001b[39m \u001b[43m \u001b[49m\u001b[43mwith_vectors\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwith_vectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 433\u001b[39m \u001b[43m \u001b[49m\u001b[43mscore_threshold\u001b[49m\u001b[43m=\u001b[49m\u001b[43mscore_threshold\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 434\u001b[39m \u001b[43m \u001b[49m\u001b[43musing\u001b[49m\u001b[43m=\u001b[49m\u001b[43musing\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 435\u001b[39m \u001b[43m \u001b[49m\u001b[43mlookup_from\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlookup_from\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 436\u001b[39m \u001b[43m \u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 437\u001b[39m \u001b[43m \u001b[49m\u001b[43mshard_key_selector\u001b[49m\u001b[43m=\u001b[49m\u001b[43mshard_key_selector\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 438\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 439\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 440\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/qdrant_remote.py:538\u001b[39m, in \u001b[36mQdrantRemote.query_points\u001b[39m\u001b[34m(self, collection_name, query, using, prefetch, query_filter, search_params, limit, offset, with_payload, with_vectors, score_threshold, lookup_from, consistency, shard_key_selector, timeout, **kwargs)\u001b[39m\n\u001b[32m 521\u001b[39m lookup_from = GrpcToRest.convert_lookup_location(lookup_from)\n\u001b[32m 523\u001b[39m query_request = models.QueryRequest(\n\u001b[32m 524\u001b[39m shard_key=shard_key_selector,\n\u001b[32m 525\u001b[39m prefetch=prefetch,\n\u001b[32m (...)\u001b[39m\u001b[32m 535\u001b[39m lookup_from=lookup_from,\n\u001b[32m 536\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m538\u001b[39m query_result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mhttp\u001b[49m\u001b[43m.\u001b[49m\u001b[43msearch_api\u001b[49m\u001b[43m.\u001b[49m\u001b[43mquery_points\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 539\u001b[39m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 540\u001b[39m \u001b[43m \u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 541\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 542\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_request\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 543\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 545\u001b[39m result: models.QueryResponse | \u001b[38;5;28;01mNone\u001b[39;00m = query_result.result\n\u001b[32m 546\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[33m\"\u001b[39m\u001b[33mSearch returned None\u001b[39m\u001b[33m\"\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/http/api/search_api.py:783\u001b[39m, in \u001b[36mSyncSearchApi.query_points\u001b[39m\u001b[34m(self, collection_name, consistency, timeout, query_request)\u001b[39m\n\u001b[32m 773\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mquery_points\u001b[39m(\n\u001b[32m 774\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 775\u001b[39m collection_name: \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 778\u001b[39m query_request: m.QueryRequest = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 779\u001b[39m ) -> m.InlineResponse20021:\n\u001b[32m 780\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 781\u001b[39m \u001b[33;03m Universally query points. This endpoint covers all capabilities of search, recommend, discover, filters. But also enables hybrid and multi-stage queries.\u001b[39;00m\n\u001b[32m 782\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m783\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_build_for_query_points\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 784\u001b[39m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 785\u001b[39m \u001b[43m \u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconsistency\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 786\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 787\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery_request\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 788\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/http/api/search_api.py:181\u001b[39m, in \u001b[36m_SearchApi._build_for_query_points\u001b[39m\u001b[34m(self, collection_name, consistency, timeout, query_request)\u001b[39m\n\u001b[32m 179\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mContent-Type\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m headers:\n\u001b[32m 180\u001b[39m headers[\u001b[33m\"\u001b[39m\u001b[33mContent-Type\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[33m\"\u001b[39m\u001b[33mapplication/json\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m181\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mapi_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 182\u001b[39m \u001b[43m \u001b[49m\u001b[43mtype_\u001b[49m\u001b[43m=\u001b[49m\u001b[43mm\u001b[49m\u001b[43m.\u001b[49m\u001b[43mInlineResponse20021\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 183\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mPOST\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 184\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m/collections/\u001b[39;49m\u001b[38;5;132;43;01m{collection_name}\u001b[39;49;00m\u001b[33;43m/points/query\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 185\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 186\u001b[39m \u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 187\u001b[39m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m=\u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 188\u001b[39m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 189\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/http/api_client.py:95\u001b[39m, in \u001b[36mApiClient.request\u001b[39m\u001b[34m(self, type_, method, url, path_params, **kwargs)\u001b[39m\n\u001b[32m 93\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mint\u001b[39m(kwargs[\u001b[33m\"\u001b[39m\u001b[33mparams\u001b[39m\u001b[33m\"\u001b[39m][\u001b[33m\"\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m 94\u001b[39m request = \u001b[38;5;28mself\u001b[39m._client.build_request(method, url, **kwargs)\n\u001b[32m---> \u001b[39m\u001b[32m95\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtype_\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/dev/rag_tg_2025/venv/lib/python3.13/site-packages/qdrant_client/http/api_client.py:130\u001b[39m, in \u001b[36mApiClient.send\u001b[39m\u001b[34m(self, request, type_)\u001b[39m\n\u001b[32m 128\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ValidationError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 129\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResponseHandlingException(e)\n\u001b[32m--> \u001b[39m\u001b[32m130\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedResponse.for_response(response)\n", "\u001b[31mUnexpectedResponse\u001b[39m: Unexpected Response: 404 (Not Found)\nRaw response content:\nb'{\"status\":{\"error\":\"Not found: Collection `my_collection` doesn\\'t exist!\"},\"time\":0.004926893}'" ] } ], "source": [ "results = client.query_points(\n", " collection_name=\"my_collection\",\n", " query=(-np.ones(384)).tolist(),\n", " limit=1\n", ")" ] }, { "cell_type": "code", "execution_count": 43, "id": "afd7344b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ScoredPoint(id=2, version=3, score=0.9999998, payload={'text': 'Пример документа 2'}, vector=None, shard_key=None, order_value=None)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results.points[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "8060434c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.3" } }, "nbformat": 4, "nbformat_minor": 5 }