{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "executionInfo": { "elapsed": 49, "status": "ok", "timestamp": 1777121871809, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "C89EBjEsx9Gq", "outputId": "e76f8c9c-cdcf-4fc7-94b5-4e8a30009518" }, "outputs": [ { "data": { "text/plain": [ "'e:\\\\gradution project'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "import os\n", "os.getcwd()\n", "os.chdir(\"/gradution project\")\n", "os.getcwd()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 11569, "status": "ok", "timestamp": 1777121884875, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "Eel6SUFWvNMW", "outputId": "10ac3316-9b05-44f4-81b8-4ba3ff26cc99" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All modules imported successfully\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tqdm.notebook import tqdm\n", "\n", "from src.similarity_model import preprocess_dataset\n", "from src.similarity_model import train_embedding_engine\n", "from src.similarity_model import search_by_text\n", "from src.similarity_model import find_similar_projects\n", "from Data.database.sql_connector import (\n", " load_preprocessed_projects,\n", " engine\n", ")\n", "\n", "print(\"All modules imported successfully\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Engine created\n" ] } ], "source": [ "from sqlalchemy import create_engine\n", "import urllib\n", "\n", "SERVER = \"innotrack-sql-server.database.windows.net\"\n", "DATABASE = \"InnoTrackDB\"\n", "USERNAME = \"innotrackadmin\"\n", "PASSWORD = \"Innotrack@admin233\"\n", "\n", "params = urllib.parse.quote_plus(\n", " f\"DRIVER={{ODBC Driver 18 for SQL Server}};\"\n", " f\"SERVER={SERVER};\"\n", " f\"DATABASE={DATABASE};\"\n", " f\"UID={USERNAME};\"\n", " f\"PWD={PASSWORD};\"\n", " \"Encrypt=yes;\"\n", " \"TrustServerCertificate=no;\"\n", " \"Connection Timeout=30;\"\n", ")\n", "\n", "engine = create_engine(\n", " f\"mssql+pyodbc:///?odbc_connect={params}\"\n", ")\n", "\n", "print(\"Engine created\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TABLE_NAME
0Teams
1ChatRooms
2JoinRequests
3Projects
4TeamMembers
5ChatMessages
6Feedbacks
7OriginalityReports
8ProjectAttachments
9ProjectTechnologies
10VectorEmbeddings
11ChatMessageAttachments
12SimilarProjects
13AuditLogs
14AcademicYears
15Schema
16Job
17State
18JobParameter
19JobQueue
20database_firewall_rules
21Server
22List
23Set
24Counter
25Hash
26AggregatedCounter
27__EFMigrationsHistory
28Departments
29Skills_Backup
30Projects_Backup
31Domains
32PreProcessed_Projects
33Skills
34Technologies
35Users
36Notifications
37StudentSkills
\n", "
" ], "text/plain": [ " TABLE_NAME\n", "0 Teams\n", "1 ChatRooms\n", "2 JoinRequests\n", "3 Projects\n", "4 TeamMembers\n", "5 ChatMessages\n", "6 Feedbacks\n", "7 OriginalityReports\n", "8 ProjectAttachments\n", "9 ProjectTechnologies\n", "10 VectorEmbeddings\n", "11 ChatMessageAttachments\n", "12 SimilarProjects\n", "13 AuditLogs\n", "14 AcademicYears\n", "15 Schema\n", "16 Job\n", "17 State\n", "18 JobParameter\n", "19 JobQueue\n", "20 database_firewall_rules\n", "21 Server\n", "22 List\n", "23 Set\n", "24 Counter\n", "25 Hash\n", "26 AggregatedCounter\n", "27 __EFMigrationsHistory\n", "28 Departments\n", "29 Skills_Backup\n", "30 Projects_Backup\n", "31 Domains\n", "32 PreProcessed_Projects\n", "33 Skills\n", "34 Technologies\n", "35 Users\n", "36 Notifications\n", "37 StudentSkills" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with engine.connect() as conn:\n", "\n", " tables = pd.read_sql(\n", " \"\"\"\n", " SELECT TABLE_NAME\n", " FROM INFORMATION_SCHEMA.TABLES\n", " \"\"\",\n", " conn\n", " )\n", "\n", "tables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 712 }, "executionInfo": { "elapsed": 93, "status": "ok", "timestamp": 1777121887495, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "QlD5ukNTDS6S", "outputId": "43dab901-85ac-4386-d77a-2bb24af4b111" }, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'E:\\\\gradution project\\\\Data\\\\raw\\\\Graduation Projects - Sheet1.xlsx'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m file_path = \u001b[33mr\"E:\\gradution project\\Data\\raw\\Graduation Projects - Sheet1.xlsx\"\u001b[39m\n\u001b[32m 2\u001b[39m \n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m df_raw = pd.read_excel(file_path)\n\u001b[32m 4\u001b[39m \n\u001b[32m 5\u001b[39m print(\u001b[33m\"Rows:\"\u001b[39m, len(df_raw))\n\u001b[32m 6\u001b[39m df_raw.tail()\n", "\u001b[36mFile \u001b[39m\u001b[32me:\\gradution project\\.venv\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:481\u001b[39m, in \u001b[36mread_excel\u001b[39m\u001b[34m(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend, engine_kwargs)\u001b[39m\n\u001b[32m 479\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(io, ExcelFile):\n\u001b[32m 480\u001b[39m should_close = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m481\u001b[39m io = \u001b[30;43mExcelFile\u001b[39;49m\u001b[30;43m(\u001b[39;49m\n\u001b[32m 482\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mio\u001b[39;49m\u001b[30;43m,\u001b[39;49m\n\u001b[32m 483\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\u001b[30;43m,\u001b[39;49m\n\u001b[32m 484\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mengine\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mengine\u001b[39;49m\u001b[30;43m,\u001b[39;49m\n\u001b[32m 485\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mengine_kwargs\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mengine_kwargs\u001b[39;49m\u001b[30;43m,\u001b[39;49m\n\u001b[32m 486\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 487\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;129;01mand\u001b[39;00m engine != io.engine:\n\u001b[32m 488\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 489\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mEngine should not be specified when passing \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 490\u001b[39m \u001b[33m\"\u001b[39m\u001b[33man ExcelFile - ExcelFile already has the engine set\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 491\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32me:\\gradution project\\.venv\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1604\u001b[39m, in \u001b[36mExcelFile.__init__\u001b[39m\u001b[34m(self, path_or_buffer, engine, storage_options, engine_kwargs)\u001b[39m\n\u001b[32m 1601\u001b[39m ext = \u001b[33m\"\u001b[39m\u001b[33mxls\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1603\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1604\u001b[39m ext = \u001b[30;43minspect_excel_format\u001b[39;49m\u001b[30;43m(\u001b[39;49m\n\u001b[32m 1605\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mcontent_or_path\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mpath_or_buffer\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\n\u001b[32m 1606\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 1607\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1608\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 1609\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mExcel file format cannot be determined, you must specify \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1610\u001b[39m \u001b[33m\"\u001b[39m\u001b[33man engine manually.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1611\u001b[39m )\n", "\u001b[36mFile \u001b[39m\u001b[32me:\\gradution project\\.venv\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1452\u001b[39m, in \u001b[36minspect_excel_format\u001b[39m\u001b[34m(content_or_path, storage_options)\u001b[39m\n\u001b[32m 1417\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minspect_excel_format\u001b[39m(\n\u001b[32m 1418\u001b[39m content_or_path: FilePath | ReadBuffer[\u001b[38;5;28mbytes\u001b[39m],\n\u001b[32m 1419\u001b[39m storage_options: StorageOptions | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 1420\u001b[39m ) -> \u001b[38;5;28mstr\u001b[39m | \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 1421\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1422\u001b[39m \u001b[33;03m Inspect the path or content of an excel file and get its format.\u001b[39;00m\n\u001b[32m 1423\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 1450\u001b[39m \u001b[33;03m If resulting stream does not have an XLS signature and is not a valid zipfile.\u001b[39;00m\n\u001b[32m 1451\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1452\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[30;43mget_handle\u001b[39;49m\u001b[30;43m(\u001b[39;49m\n\u001b[32m 1453\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43mcontent_or_path\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43mrb\u001b[39;49m\u001b[30;43m\"\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43mstorage_options\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mis_text\u001b[39;49m\u001b[30;43m=\u001b[39;49m\u001b[30;43;01mFalse\u001b[39;49;00m\n\u001b[32m 1454\u001b[39m \u001b[30;43m \u001b[39;49m\u001b[30;43m)\u001b[39;49m \u001b[38;5;28;01mas\u001b[39;00m handle:\n\u001b[32m 1455\u001b[39m stream = handle.handle\n\u001b[32m 1456\u001b[39m stream.seek(\u001b[32m0\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32me:\\gradution project\\.venv\\Lib\\site-packages\\pandas\\io\\common.py:935\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 926\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(\n\u001b[32m 927\u001b[39m handle,\n\u001b[32m 928\u001b[39m ioargs.mode,\n\u001b[32m (...)\u001b[39m\u001b[32m 931\u001b[39m newline=\u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 932\u001b[39m )\n\u001b[32m 933\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 934\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m935\u001b[39m handle = \u001b[30;43mopen\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43mhandle\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43mioargs\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mmode\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\u001b[32m 936\u001b[39m handles.append(handle)\n\u001b[32m 938\u001b[39m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n", "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'E:\\\\gradution project\\\\Data\\\\raw\\\\Graduation Projects - Sheet1.xlsx'" ] } ], "source": [ "query = \"\"\"\n", "SELECT *\n", "FROM Projects\n", "\"\"\"\n", "\n", "df_raw= pd.read_sql(query, engine)\n", "\n", "\n", "print(\"Rows:\", len(df_raw))\n", "df_raw.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 695 }, "executionInfo": { "elapsed": 8196, "status": "ok", "timestamp": 1777121896550, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "CbhNr97MF3G_", "outputId": "92994227-fc55-42ee-db94-53815d8e0b49" }, "outputs": [], "source": [ "clean_df = preprocess_dataset(df_raw)\n", "\n", "print(\"Rows after cleaning:\", len(clean_df))\n", "clean_df.tail()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1777121896561, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "Cddf8xCADlIr", "outputId": "89f60abf-fcf6-4d05-e53e-5c23404229c9" }, "outputs": [], "source": [ "print(type(clean_df.loc[0, \"features\"]))\n", "print(clean_df.loc[0, \"features\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1777121896571, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "FVpnglAmGbyU", "outputId": "347ad1f0-b83e-44f7-bdf0-7ac682fd3be6" }, "outputs": [], "source": [ "print(clean_df.columns.tolist())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "object_cols = clean_df.select_dtypes(include=\"object\").columns\n", "\n", "for col in object_cols:\n", " clean_df[col] = clean_df[col].astype(str)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 28, "status": "ok", "timestamp": 1777121896600, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "viSKYOBUSy9-", "outputId": "9cd3450e-2ad3-47c9-e64f-29317d682ab3" }, "outputs": [], "source": [ "clean_df.to_parquet(\"Data/processed/projects_clean.parquet\", index=False)\n", "clean_df.to_csv(\"Data/processed/projects_clean.csv\", index=False)\n", "\n", "print(\"Saved cleaned dataset\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "e88a3bd74a0d45a9a6f4c75fa912f14b", "84524d1a1b544ba8b279bcf255414b46", "13a35346277d46e4a0e8ce3c97af9d80", "eea5a279cf4745ea95f436cd5fc5275c", "26153e11473049bd94767d6e2ade78ce", "1ceff15e67754111b9d28aa79545998e", "b3ad8424cabb497fb65961b7a707c48d", "3c076741fb62422a90f2572c455f3495", "7979057fb043438cb98737e36403187b", "cb460715a0974fceb2fdeb31e37b03dc", "c4dffeedf7d349008a7228a24b325d7d" ] }, "executionInfo": { "elapsed": 1317, "status": "ok", "timestamp": 1777121906722, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "5i8JvSFnHdTI", "outputId": "aca927cb-4c37-4ca3-d2c2-a164597cb5b2" }, "outputs": [], "source": [ "engine = train_embedding_engine(\n", " data_path=\"Data/processed/projects_clean.parquet\"\n", ")\n", "\n", "print(\"Embedding trained successfully\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 25, "status": "ok", "timestamp": 1777121906750, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "IsScZiTG74WJ", "outputId": "d696efc1-04db-4a2f-c9fe-b1e97af92c2c" }, "outputs": [], "source": [ "from importlib import reload\n", "import src.similarity_model as fs\n", "\n", "reload(fs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 29, "status": "ok", "timestamp": 1777121906780, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "N31P2msk757o", "outputId": "eb03780d-e519-4ea2-820c-ab701a5cee4b" }, "outputs": [], "source": [ "from importlib import reload\n", "import src.similarity_model as sim\n", "\n", "reload(sim)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 302 }, "executionInfo": { "elapsed": 193, "status": "ok", "timestamp": 1777123794574, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "T77gmDkqHuTI", "outputId": "515a8d56-2978-4efa-f4d4-b1d4c631adc2" }, "outputs": [], "source": [ "final_results = find_similar_projects(\n", " title=\"Smart Library app\",\n", " description=\"\"\"\n", " Library chatbot book recommendation search engine \n", " \"\"\",\n", " top_k=5\n", ")\n", "final_results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 27107, "status": "ok", "timestamp": 1777121936703, "user": { "displayName": "yossef assem", "userId": "00346058032754430335" }, "user_tz": -180 }, "id": "suEasutzr33J", "outputId": "dacf67d9-6725-4437-bb10-6dd8e67d03f3" }, "outputs": [], "source": [ "! python -m src.similarity_model.evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ic6XSADKuofS" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "authorship_tag": "ABX9TyNMA3/B9+b9xx1ogSJIFr/g", "gpuType": "T4", "mount_file_id": "1iTaLZau9bjv5c8jEr21XE8vntXZBV4i2", "provenance": [] }, "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "13a35346277d46e4a0e8ce3c97af9d80": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3c076741fb62422a90f2572c455f3495", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7979057fb043438cb98737e36403187b", "value": 1 } }, "1ceff15e67754111b9d28aa79545998e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "26153e11473049bd94767d6e2ade78ce": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3c076741fb62422a90f2572c455f3495": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7979057fb043438cb98737e36403187b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "84524d1a1b544ba8b279bcf255414b46": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1ceff15e67754111b9d28aa79545998e", "placeholder": "​", "style": "IPY_MODEL_b3ad8424cabb497fb65961b7a707c48d", "value": "Batches: 100%" } }, "b3ad8424cabb497fb65961b7a707c48d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c4dffeedf7d349008a7228a24b325d7d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "cb460715a0974fceb2fdeb31e37b03dc": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e88a3bd74a0d45a9a6f4c75fa912f14b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_84524d1a1b544ba8b279bcf255414b46", "IPY_MODEL_13a35346277d46e4a0e8ce3c97af9d80", "IPY_MODEL_eea5a279cf4745ea95f436cd5fc5275c" ], "layout": "IPY_MODEL_26153e11473049bd94767d6e2ade78ce" } }, "eea5a279cf4745ea95f436cd5fc5275c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cb460715a0974fceb2fdeb31e37b03dc", "placeholder": "​", "style": "IPY_MODEL_c4dffeedf7d349008a7228a24b325d7d", "value": " 1/1 [00:00<00:00,  5.13it/s]" } } } } }, "nbformat": 4, "nbformat_minor": 0 }