{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "49c6b17c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'e:\\\\gradution project'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"import os\n",
"os.getcwd()\n",
"os.chdir(\"/gradution project\")\n",
"os.getcwd()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "509448bd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" CONFIG LOADED:\n",
"ENV: development\n",
"DEBUG_MODE: True\n",
"MODELS: ['gemini-3.1-flash-lite-preview', 'gemini-2.5-flash-lite', 'gemini-2.5-flash', 'gemini-2.5-pro']\n",
"MAX_RETRIES: 3\n",
"IDEA_TEMP: 0.9\n",
"=================================\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:29:43,014 | INFO | Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n",
"e:\\gradution project\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:949: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"2026-06-04 00:29:46,381 | INFO | Use pytorch device_name: cpu\n",
"2026-06-04 00:29:46,388 | INFO | Loading faiss with AVX2 support.\n",
"2026-06-04 00:29:46,418 | INFO | Successfully loaded faiss with AVX2 support.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SQL Connected Successfully\n",
"All modules imported successfully\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm.notebook import tqdm\n",
"\n",
"from src.similarity_model import preprocess_dataset\n",
"from src.similarity_model import train_embedding_engine\n",
"from src.similarity_model import search_by_text\n",
"from src.similarity_model import find_similar_projects\n",
"from src.similarity_model import extract_features\n",
"\n",
"from src.similarity_model import normalize_text\n",
"from src.similarity_model import compute_feature_similarity\n",
"from Data.database.sql_connector import (\n",
" load_preprocessed_projects,\n",
" engine\n",
")\n",
"\n",
"print(\"All modules imported successfully\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "0bf93b8e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Engine created\n"
]
}
],
"source": [
"from sqlalchemy import create_engine\n",
"import urllib\n",
"\n",
"SERVER = \"innotrack-sql-server.database.windows.net\"\n",
"DATABASE = \"InnoTrackDB\"\n",
"USERNAME = \"innotrackadmin\"\n",
"PASSWORD = \"Innotrack@admin233\"\n",
"\n",
"params = urllib.parse.quote_plus(\n",
" f\"DRIVER={{ODBC Driver 18 for SQL Server}};\"\n",
" f\"SERVER={SERVER};\"\n",
" f\"DATABASE={DATABASE};\"\n",
" f\"UID={USERNAME};\"\n",
" f\"PWD={PASSWORD};\"\n",
" \"Encrypt=yes;\"\n",
" \"TrustServerCertificate=no;\"\n",
" \"Connection Timeout=30;\"\n",
")\n",
"\n",
"engine = create_engine(\n",
" f\"mssql+pyodbc:///?odbc_connect={params}\"\n",
")\n",
"\n",
"print(\"Engine created\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "11f40d1d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" TABLE_NAME | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Teams | \n",
"
\n",
" \n",
" | 1 | \n",
" ChatRooms | \n",
"
\n",
" \n",
" | 2 | \n",
" ChatMessageHiddens | \n",
"
\n",
" \n",
" | 3 | \n",
" JoinRequests | \n",
"
\n",
" \n",
" | 4 | \n",
" ChatMessageReactions | \n",
"
\n",
" \n",
" | 5 | \n",
" Projects | \n",
"
\n",
" \n",
" | 6 | \n",
" TeamMembers | \n",
"
\n",
" \n",
" | 7 | \n",
" ProjectTechnologies_Backup | \n",
"
\n",
" \n",
" | 8 | \n",
" ChatMessages | \n",
"
\n",
" \n",
" | 9 | \n",
" Feedbacks | \n",
"
\n",
" \n",
" | 10 | \n",
" MissingProjectTechsSplit | \n",
"
\n",
" \n",
" | 11 | \n",
" PreProcessed_Projects | \n",
"
\n",
" \n",
" | 12 | \n",
" OriginalityReports | \n",
"
\n",
" \n",
" | 13 | \n",
" ProjectAttachments | \n",
"
\n",
" \n",
" | 14 | \n",
" ProjectTechnologies | \n",
"
\n",
" \n",
" | 15 | \n",
" VectorEmbeddings | \n",
"
\n",
" \n",
" | 16 | \n",
" ChatMessageAttachments | \n",
"
\n",
" \n",
" | 17 | \n",
" SimilarProjects | \n",
"
\n",
" \n",
" | 18 | \n",
" AuditLogs | \n",
"
\n",
" \n",
" | 19 | \n",
" AcademicYears | \n",
"
\n",
" \n",
" | 20 | \n",
" Schema | \n",
"
\n",
" \n",
" | 21 | \n",
" Job | \n",
"
\n",
" \n",
" | 22 | \n",
" State | \n",
"
\n",
" \n",
" | 23 | \n",
" JobParameter | \n",
"
\n",
" \n",
" | 24 | \n",
" JobQueue | \n",
"
\n",
" \n",
" | 25 | \n",
" database_firewall_rules | \n",
"
\n",
" \n",
" | 26 | \n",
" Server | \n",
"
\n",
" \n",
" | 27 | \n",
" List | \n",
"
\n",
" \n",
" | 28 | \n",
" Set | \n",
"
\n",
" \n",
" | 29 | \n",
" Counter | \n",
"
\n",
" \n",
" | 30 | \n",
" Hash | \n",
"
\n",
" \n",
" | 31 | \n",
" AggregatedCounter | \n",
"
\n",
" \n",
" | 32 | \n",
" __EFMigrationsHistory | \n",
"
\n",
" \n",
" | 33 | \n",
" Departments | \n",
"
\n",
" \n",
" | 34 | \n",
" Skills_Backup | \n",
"
\n",
" \n",
" | 35 | \n",
" Projects_Backup | \n",
"
\n",
" \n",
" | 36 | \n",
" Domains | \n",
"
\n",
" \n",
" | 37 | \n",
" Skills | \n",
"
\n",
" \n",
" | 38 | \n",
" Technologies | \n",
"
\n",
" \n",
" | 39 | \n",
" Users | \n",
"
\n",
" \n",
" | 40 | \n",
" ProjectDrafts | \n",
"
\n",
" \n",
" | 41 | \n",
" Notifications | \n",
"
\n",
" \n",
" | 42 | \n",
" ProjectDraftTechnologies | \n",
"
\n",
" \n",
" | 43 | \n",
" StudentSkills | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" TABLE_NAME\n",
"0 Teams\n",
"1 ChatRooms\n",
"2 ChatMessageHiddens\n",
"3 JoinRequests\n",
"4 ChatMessageReactions\n",
"5 Projects\n",
"6 TeamMembers\n",
"7 ProjectTechnologies_Backup\n",
"8 ChatMessages\n",
"9 Feedbacks\n",
"10 MissingProjectTechsSplit\n",
"11 PreProcessed_Projects\n",
"12 OriginalityReports\n",
"13 ProjectAttachments\n",
"14 ProjectTechnologies\n",
"15 VectorEmbeddings\n",
"16 ChatMessageAttachments\n",
"17 SimilarProjects\n",
"18 AuditLogs\n",
"19 AcademicYears\n",
"20 Schema\n",
"21 Job\n",
"22 State\n",
"23 JobParameter\n",
"24 JobQueue\n",
"25 database_firewall_rules\n",
"26 Server\n",
"27 List\n",
"28 Set\n",
"29 Counter\n",
"30 Hash\n",
"31 AggregatedCounter\n",
"32 __EFMigrationsHistory\n",
"33 Departments\n",
"34 Skills_Backup\n",
"35 Projects_Backup\n",
"36 Domains\n",
"37 Skills\n",
"38 Technologies\n",
"39 Users\n",
"40 ProjectDrafts\n",
"41 Notifications\n",
"42 ProjectDraftTechnologies\n",
"43 StudentSkills"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with engine.connect() as conn:\n",
"\n",
" tables = pd.read_sql(\n",
" \"\"\"\n",
" SELECT TABLE_NAME\n",
" FROM INFORMATION_SCHEMA.TABLES\n",
" \"\"\",\n",
" conn\n",
" )\n",
"\n",
"tables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5d1125cb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" submitted_at | \n",
" project_title | \n",
" student_names | \n",
" year | \n",
" abstract | \n",
" description | \n",
" problem_statement | \n",
" proposed_solution | \n",
" objectives | \n",
" full_content | \n",
" clean_text | \n",
" word_count | \n",
" features | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" NaT | \n",
" 3D hand game for neuromuscular patients | \n",
" Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... | \n",
" 2017 | \n",
" In this project we have designed and implement... | \n",
" A virtual rehabilitation system that uses a Le... | \n",
" Neuromuscular patients suffer from nerve atrop... | \n",
" The development of a 3D interactive game integ... | \n",
" 1. Develop a scalable and maintainable solutio... | \n",
" 3D hand game for neuromuscular patients. 3D ha... | \n",
" 3d hand game for neuromuscular patients. 3d ha... | \n",
" 172 | \n",
" \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" NaT | \n",
" 3D Laser Scanning | \n",
" Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... | \n",
" 2024 | \n",
" 3D scanning is used in many applications such ... | \n",
" This project implements a low-cost 3D laser sc... | \n",
" Existing 3D scanning devices are often extreme... | \n",
" A low-cost 3D laser scanning system that utili... | \n",
" 1. Improve overall productivity and workflow o... | \n",
" 3D Laser Scanning. 3D Laser Scanning. 3D scann... | \n",
" 3d laser scanning. 3d laser scanning. 3d scann... | \n",
" 185 | \n",
" \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" NaT | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... | \n",
" 2020 | \n",
" The increasing use of biometric technologies i... | \n",
" This project develops an automated criminal id... | \n",
" Traditional identification methods, such as ph... | \n",
" A real-time facial recognition system develope... | \n",
" 1. Support future scalability and feature expa... | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" a smart automatic system for criminal identifi... | \n",
" 138 | \n",
" \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" NaT | \n",
" Advanced Educational Platform “ABSTHALK” | \n",
" Mohamed Nasser Maher, Karim Ashraf Salah Eldie... | \n",
" 2025 | \n",
" The Educational Platform for Students and Teac... | \n",
" ABSTHALK is a comprehensive, role-based e-lear... | \n",
" Traditional learning methods often lack access... | \n",
" The project proposes a structured, role-based,... | \n",
" 1. Provide interactive educational tools and r... | \n",
" Advanced Educational Platform “ABSTHALK”. Adva... | \n",
" advanced educational platform absthalk . advan... | \n",
" 192 | \n",
" \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" NaT | \n",
" Agricultural Information and Management System | \n",
" Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... | \n",
" 2020 | \n",
" It is a permanent link between the decision-ma... | \n",
" This project is an integrated information syst... | \n",
" The competent authorities of the Ministry of A... | \n",
" The development of an integrated information s... | \n",
" 1. Reduce operational complexity and improve e... | \n",
" Agricultural Information and Management System... | \n",
" agricultural information and management system... | \n",
" 109 | \n",
" \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id submitted_at project_title \\\n",
"0 1 NaT 3D hand game for neuromuscular patients \n",
"1 2 NaT 3D Laser Scanning \n",
"2 3 NaT A Smart Automatic System for Criminal Identifi... \n",
"3 4 NaT Advanced Educational Platform “ABSTHALK” \n",
"4 5 NaT Agricultural Information and Management System \n",
"\n",
" student_names year \\\n",
"0 Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... 2017 \n",
"1 Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... 2024 \n",
"2 Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... 2020 \n",
"3 Mohamed Nasser Maher, Karim Ashraf Salah Eldie... 2025 \n",
"4 Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... 2020 \n",
"\n",
" abstract \\\n",
"0 In this project we have designed and implement... \n",
"1 3D scanning is used in many applications such ... \n",
"2 The increasing use of biometric technologies i... \n",
"3 The Educational Platform for Students and Teac... \n",
"4 It is a permanent link between the decision-ma... \n",
"\n",
" description \\\n",
"0 A virtual rehabilitation system that uses a Le... \n",
"1 This project implements a low-cost 3D laser sc... \n",
"2 This project develops an automated criminal id... \n",
"3 ABSTHALK is a comprehensive, role-based e-lear... \n",
"4 This project is an integrated information syst... \n",
"\n",
" problem_statement \\\n",
"0 Neuromuscular patients suffer from nerve atrop... \n",
"1 Existing 3D scanning devices are often extreme... \n",
"2 Traditional identification methods, such as ph... \n",
"3 Traditional learning methods often lack access... \n",
"4 The competent authorities of the Ministry of A... \n",
"\n",
" proposed_solution \\\n",
"0 The development of a 3D interactive game integ... \n",
"1 A low-cost 3D laser scanning system that utili... \n",
"2 A real-time facial recognition system develope... \n",
"3 The project proposes a structured, role-based,... \n",
"4 The development of an integrated information s... \n",
"\n",
" objectives \\\n",
"0 1. Develop a scalable and maintainable solutio... \n",
"1 1. Improve overall productivity and workflow o... \n",
"2 1. Support future scalability and feature expa... \n",
"3 1. Provide interactive educational tools and r... \n",
"4 1. Reduce operational complexity and improve e... \n",
"\n",
" full_content \\\n",
"0 3D hand game for neuromuscular patients. 3D ha... \n",
"1 3D Laser Scanning. 3D Laser Scanning. 3D scann... \n",
"2 A Smart Automatic System for Criminal Identifi... \n",
"3 Advanced Educational Platform “ABSTHALK”. Adva... \n",
"4 Agricultural Information and Management System... \n",
"\n",
" clean_text word_count \\\n",
"0 3d hand game for neuromuscular patients. 3d ha... 172 \n",
"1 3d laser scanning. 3d laser scanning. 3d scann... 185 \n",
"2 a smart automatic system for criminal identifi... 138 \n",
"3 advanced educational platform absthalk . advan... 192 \n",
"4 agricultural information and management system... 109 \n",
"\n",
" features \n",
"0 \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... \n",
"1 \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... \n",
"2 \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... \n",
"3 \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... \n",
"4 \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"\"\"\n",
"SELECT *\n",
"FROM PreProcessed_Projects\n",
"\"\"\"\n",
"\n",
"df = pd.read_sql(query, engine)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4429717d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['id', 'submitted_at', 'project_title', 'student_names', 'year', 'abstract', 'description', 'problem_statement', 'proposed_solution', 'objectives', 'full_content', 'clean_text', 'word_count', 'features']\n"
]
}
],
"source": [
"print(df.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9925da4c",
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns={\n",
" \"Title\": \"project_title\",\n",
" \"Description\": \"description\",\n",
" \"Abstract\": \"abstract\"\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fc62d4f3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" submitted_at | \n",
" project_title | \n",
" student_names | \n",
" year | \n",
" abstract | \n",
" description | \n",
" problem_statement | \n",
" proposed_solution | \n",
" objectives | \n",
" full_content | \n",
" clean_text | \n",
" word_count | \n",
" features | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" NaT | \n",
" 3D hand game for neuromuscular patients | \n",
" Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... | \n",
" 2017 | \n",
" In this project we have designed and implement... | \n",
" A virtual rehabilitation system that uses a Le... | \n",
" Neuromuscular patients suffer from nerve atrop... | \n",
" The development of a 3D interactive game integ... | \n",
" 1. Develop a scalable and maintainable solutio... | \n",
" 3D hand game for neuromuscular patients. 3D ha... | \n",
" 3d hand game for neuromuscular patients. 3d ha... | \n",
" 172 | \n",
" \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" NaT | \n",
" 3D Laser Scanning | \n",
" Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... | \n",
" 2024 | \n",
" 3D scanning is used in many applications such ... | \n",
" This project implements a low-cost 3D laser sc... | \n",
" Existing 3D scanning devices are often extreme... | \n",
" A low-cost 3D laser scanning system that utili... | \n",
" 1. Improve overall productivity and workflow o... | \n",
" 3D Laser Scanning. 3D Laser Scanning. 3D scann... | \n",
" 3d laser scanning. 3d laser scanning. 3d scann... | \n",
" 185 | \n",
" \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" NaT | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... | \n",
" 2020 | \n",
" The increasing use of biometric technologies i... | \n",
" This project develops an automated criminal id... | \n",
" Traditional identification methods, such as ph... | \n",
" A real-time facial recognition system develope... | \n",
" 1. Support future scalability and feature expa... | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" a smart automatic system for criminal identifi... | \n",
" 138 | \n",
" \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" NaT | \n",
" Advanced Educational Platform “ABSTHALK” | \n",
" Mohamed Nasser Maher, Karim Ashraf Salah Eldie... | \n",
" 2025 | \n",
" The Educational Platform for Students and Teac... | \n",
" ABSTHALK is a comprehensive, role-based e-lear... | \n",
" Traditional learning methods often lack access... | \n",
" The project proposes a structured, role-based,... | \n",
" 1. Provide interactive educational tools and r... | \n",
" Advanced Educational Platform “ABSTHALK”. Adva... | \n",
" advanced educational platform absthalk . advan... | \n",
" 192 | \n",
" \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" NaT | \n",
" Agricultural Information and Management System | \n",
" Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... | \n",
" 2020 | \n",
" It is a permanent link between the decision-ma... | \n",
" This project is an integrated information syst... | \n",
" The competent authorities of the Ministry of A... | \n",
" The development of an integrated information s... | \n",
" 1. Reduce operational complexity and improve e... | \n",
" Agricultural Information and Management System... | \n",
" agricultural information and management system... | \n",
" 109 | \n",
" \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id submitted_at project_title \\\n",
"0 1 NaT 3D hand game for neuromuscular patients \n",
"1 2 NaT 3D Laser Scanning \n",
"2 3 NaT A Smart Automatic System for Criminal Identifi... \n",
"3 4 NaT Advanced Educational Platform “ABSTHALK” \n",
"4 5 NaT Agricultural Information and Management System \n",
"\n",
" student_names year \\\n",
"0 Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... 2017 \n",
"1 Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... 2024 \n",
"2 Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... 2020 \n",
"3 Mohamed Nasser Maher, Karim Ashraf Salah Eldie... 2025 \n",
"4 Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... 2020 \n",
"\n",
" abstract \\\n",
"0 In this project we have designed and implement... \n",
"1 3D scanning is used in many applications such ... \n",
"2 The increasing use of biometric technologies i... \n",
"3 The Educational Platform for Students and Teac... \n",
"4 It is a permanent link between the decision-ma... \n",
"\n",
" description \\\n",
"0 A virtual rehabilitation system that uses a Le... \n",
"1 This project implements a low-cost 3D laser sc... \n",
"2 This project develops an automated criminal id... \n",
"3 ABSTHALK is a comprehensive, role-based e-lear... \n",
"4 This project is an integrated information syst... \n",
"\n",
" problem_statement \\\n",
"0 Neuromuscular patients suffer from nerve atrop... \n",
"1 Existing 3D scanning devices are often extreme... \n",
"2 Traditional identification methods, such as ph... \n",
"3 Traditional learning methods often lack access... \n",
"4 The competent authorities of the Ministry of A... \n",
"\n",
" proposed_solution \\\n",
"0 The development of a 3D interactive game integ... \n",
"1 A low-cost 3D laser scanning system that utili... \n",
"2 A real-time facial recognition system develope... \n",
"3 The project proposes a structured, role-based,... \n",
"4 The development of an integrated information s... \n",
"\n",
" objectives \\\n",
"0 1. Develop a scalable and maintainable solutio... \n",
"1 1. Improve overall productivity and workflow o... \n",
"2 1. Support future scalability and feature expa... \n",
"3 1. Provide interactive educational tools and r... \n",
"4 1. Reduce operational complexity and improve e... \n",
"\n",
" full_content \\\n",
"0 3D hand game for neuromuscular patients. 3D ha... \n",
"1 3D Laser Scanning. 3D Laser Scanning. 3D scann... \n",
"2 A Smart Automatic System for Criminal Identifi... \n",
"3 Advanced Educational Platform “ABSTHALK”. Adva... \n",
"4 Agricultural Information and Management System... \n",
"\n",
" clean_text word_count \\\n",
"0 3d hand game for neuromuscular patients. 3d ha... 172 \n",
"1 3d laser scanning. 3d laser scanning. 3d scann... 185 \n",
"2 a smart automatic system for criminal identifi... 138 \n",
"3 advanced educational platform absthalk . advan... 192 \n",
"4 agricultural information and management system... 109 \n",
"\n",
" features \n",
"0 \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... \n",
"1 \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... \n",
"2 \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... \n",
"3 \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... \n",
"4 \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"\"\"\n",
"SELECT *\n",
"FROM PreProcessed_Projects\n",
"\"\"\"\n",
"\n",
"clean_df = pd.read_sql(query, engine)\n",
"\n",
"clean_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e5af88d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(255, 14)\n"
]
}
],
"source": [
"print(clean_df.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "bb80639a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 255.000000\n",
"mean 236.031373\n",
"std 87.747619\n",
"min 24.000000\n",
"25% 173.500000\n",
"50% 225.000000\n",
"75% 287.000000\n",
"max 719.000000\n",
"Name: features, dtype: float64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clean_df[\"features\"].apply(len).describe()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "633cfec4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved cleaned dataset\n"
]
}
],
"source": [
"clean_df.to_parquet(\"Data_gemini/projects_clean_gemini.parquet\", index=False)\n",
"clean_df.to_csv(\"Data_gemini/projects_clean_gemini.csv\", index=False)\n",
"\n",
"print(\"Saved cleaned dataset\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "36f84432",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(255, 14)\n"
]
}
],
"source": [
"test_df = pd.read_parquet(\n",
" \"Data_gemini/projects_clean_gemini.parquet\"\n",
")\n",
"\n",
"print(test_df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0dd86aec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['id', 'submitted_at', 'project_title', 'student_names', 'year', 'abstract', 'description', 'problem_statement', 'proposed_solution', 'objectives', 'full_content', 'clean_text', 'word_count', 'features']\n"
]
}
],
"source": [
"print(clean_df.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e3e96549",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" submitted_at | \n",
" project_title | \n",
" student_names | \n",
" year | \n",
" abstract | \n",
" description | \n",
" problem_statement | \n",
" proposed_solution | \n",
" objectives | \n",
" full_content | \n",
" clean_text | \n",
" word_count | \n",
" features | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" None | \n",
" 3D hand game for neuromuscular patients | \n",
" Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... | \n",
" 2017 | \n",
" In this project we have designed and implement... | \n",
" A virtual rehabilitation system that uses a Le... | \n",
" Neuromuscular patients suffer from nerve atrop... | \n",
" The development of a 3D interactive game integ... | \n",
" 1. Develop a scalable and maintainable solutio... | \n",
" 3D hand game for neuromuscular patients. 3D ha... | \n",
" 3d hand game for neuromuscular patients. 3d ha... | \n",
" 172 | \n",
" \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" None | \n",
" 3D Laser Scanning | \n",
" Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... | \n",
" 2024 | \n",
" 3D scanning is used in many applications such ... | \n",
" This project implements a low-cost 3D laser sc... | \n",
" Existing 3D scanning devices are often extreme... | \n",
" A low-cost 3D laser scanning system that utili... | \n",
" 1. Improve overall productivity and workflow o... | \n",
" 3D Laser Scanning. 3D Laser Scanning. 3D scann... | \n",
" 3d laser scanning. 3d laser scanning. 3d scann... | \n",
" 185 | \n",
" \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" None | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... | \n",
" 2020 | \n",
" The increasing use of biometric technologies i... | \n",
" This project develops an automated criminal id... | \n",
" Traditional identification methods, such as ph... | \n",
" A real-time facial recognition system develope... | \n",
" 1. Support future scalability and feature expa... | \n",
" A Smart Automatic System for Criminal Identifi... | \n",
" a smart automatic system for criminal identifi... | \n",
" 138 | \n",
" \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" None | \n",
" Advanced Educational Platform “ABSTHALK” | \n",
" Mohamed Nasser Maher, Karim Ashraf Salah Eldie... | \n",
" 2025 | \n",
" The Educational Platform for Students and Teac... | \n",
" ABSTHALK is a comprehensive, role-based e-lear... | \n",
" Traditional learning methods often lack access... | \n",
" The project proposes a structured, role-based,... | \n",
" 1. Provide interactive educational tools and r... | \n",
" Advanced Educational Platform “ABSTHALK”. Adva... | \n",
" advanced educational platform absthalk . advan... | \n",
" 192 | \n",
" \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" None | \n",
" Agricultural Information and Management System | \n",
" Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... | \n",
" 2020 | \n",
" It is a permanent link between the decision-ma... | \n",
" This project is an integrated information syst... | \n",
" The competent authorities of the Ministry of A... | \n",
" The development of an integrated information s... | \n",
" 1. Reduce operational complexity and improve e... | \n",
" Agricultural Information and Management System... | \n",
" agricultural information and management system... | \n",
" 109 | \n",
" \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id submitted_at project_title \\\n",
"0 1 None 3D hand game for neuromuscular patients \n",
"1 2 None 3D Laser Scanning \n",
"2 3 None A Smart Automatic System for Criminal Identifi... \n",
"3 4 None Advanced Educational Platform “ABSTHALK” \n",
"4 5 None Agricultural Information and Management System \n",
"\n",
" student_names year \\\n",
"0 Ahmed Mansour Mohamed Saber, Ahmed Mohamed Moh... 2017 \n",
"1 Aya Essam Hegazi, Asmaa Abd EL-Aziz, Ebtehal E... 2024 \n",
"2 Yousef Yacoub Mohammed, Ahmed Mohamed Hassan,\\... 2020 \n",
"3 Mohamed Nasser Maher, Karim Ashraf Salah Eldie... 2025 \n",
"4 Ahmed Mohamed, Omar Hassan, Mahmoud Ali ,Mazen... 2020 \n",
"\n",
" abstract \\\n",
"0 In this project we have designed and implement... \n",
"1 3D scanning is used in many applications such ... \n",
"2 The increasing use of biometric technologies i... \n",
"3 The Educational Platform for Students and Teac... \n",
"4 It is a permanent link between the decision-ma... \n",
"\n",
" description \\\n",
"0 A virtual rehabilitation system that uses a Le... \n",
"1 This project implements a low-cost 3D laser sc... \n",
"2 This project develops an automated criminal id... \n",
"3 ABSTHALK is a comprehensive, role-based e-lear... \n",
"4 This project is an integrated information syst... \n",
"\n",
" problem_statement \\\n",
"0 Neuromuscular patients suffer from nerve atrop... \n",
"1 Existing 3D scanning devices are often extreme... \n",
"2 Traditional identification methods, such as ph... \n",
"3 Traditional learning methods often lack access... \n",
"4 The competent authorities of the Ministry of A... \n",
"\n",
" proposed_solution \\\n",
"0 The development of a 3D interactive game integ... \n",
"1 A low-cost 3D laser scanning system that utili... \n",
"2 A real-time facial recognition system develope... \n",
"3 The project proposes a structured, role-based,... \n",
"4 The development of an integrated information s... \n",
"\n",
" objectives \\\n",
"0 1. Develop a scalable and maintainable solutio... \n",
"1 1. Improve overall productivity and workflow o... \n",
"2 1. Support future scalability and feature expa... \n",
"3 1. Provide interactive educational tools and r... \n",
"4 1. Reduce operational complexity and improve e... \n",
"\n",
" full_content \\\n",
"0 3D hand game for neuromuscular patients. 3D ha... \n",
"1 3D Laser Scanning. 3D Laser Scanning. 3D scann... \n",
"2 A Smart Automatic System for Criminal Identifi... \n",
"3 Advanced Educational Platform “ABSTHALK”. Adva... \n",
"4 Agricultural Information and Management System... \n",
"\n",
" clean_text word_count \\\n",
"0 3d hand game for neuromuscular patients. 3d ha... 172 \n",
"1 3d laser scanning. 3d laser scanning. 3d scann... 185 \n",
"2 a smart automatic system for criminal identifi... 138 \n",
"3 advanced educational platform absthalk . advan... 192 \n",
"4 agricultural information and management system... 109 \n",
"\n",
" features \n",
"0 \"\\\"[\\\\\\\"Leap Motion controller sensor\\\\\\\", \\\\\\... \n",
"1 \"\\\"[\\\\\\\"3d laser scanning\\\\\\\", \\\\\\\"Hand-held l... \n",
"2 \"\\\"[\\\\\\\"real-time face recognition system\\\\\\\",... \n",
"3 \"\\\"[\\\\\\\"Role-based management system\\\\\\\", \\\\\\\"... \n",
"4 \"\\\"[\\\\\\\"centralized database\\\\\\\", \\\\\\\"track la... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df = pd.read_sql(\n",
" \"SELECT TOP 5 * FROM PreProcessed_Projects\",\n",
" engine\n",
")\n",
"\n",
"test_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "078d4b8c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================================================================\n",
"Hospital Test\n",
"================================================================================\n",
"USING GEMINI FEATURE EXTRACTOR\n",
"CALLING GEMINI\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:08,804 | INFO | [LLM] model=gemini-3.1-flash-lite-preview | task=feature | attempt=1\n",
"2026-06-04 00:30:08,805 | INFO | AFC is enabled with max remote calls: 10.\n",
"2026-06-04 00:30:09,875 | INFO | HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent \"HTTP/1.1 200 OK\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PARSED FEATURES:\n",
"['appointment booking', 'patient records management', 'medical records storage', 'doctor dashboard', 'physician dashboard', 'ai chatbot']\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a32846683c0e41e48b4b5cac27cbb769",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature Count: 5\n",
"\n",
"- appointment booking\n",
"- patient records management\n",
"- medical records storage\n",
"- doctor dashboard\n",
"- ai chatbot\n",
"\n",
"Duplicate Check:\n",
"patient records management <-> medical records storage (shared=1)\n",
"\n",
"\n",
"================================================================================\n",
"Machine Learning Test\n",
"================================================================================\n",
"USING GEMINI FEATURE EXTRACTOR\n",
"CALLING GEMINI\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:16,521 | INFO | [LLM] model=gemini-3.1-flash-lite-preview | task=feature | attempt=1\n",
"2026-06-04 00:30:16,522 | INFO | AFC is enabled with max remote calls: 10.\n",
"2026-06-04 00:30:17,431 | INFO | HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent \"HTTP/1.1 200 OK\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PARSED FEATURES:\n",
"['prediction', 'analysis']\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "560f448ba2794e0e9e1940be1b66697d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature Count: 2\n",
"\n",
"- prediction\n",
"- analysis\n",
"\n",
"Duplicate Check:\n",
"No duplicate overlaps found\n",
"\n",
"\n",
"================================================================================\n",
"Face Recognition Test\n",
"================================================================================\n",
"USING GEMINI FEATURE EXTRACTOR\n",
"CALLING GEMINI\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:21,508 | INFO | [LLM] model=gemini-3.1-flash-lite-preview | task=feature | attempt=1\n",
"2026-06-04 00:30:21,509 | INFO | AFC is enabled with max remote calls: 10.\n",
"2026-06-04 00:30:22,145 | INFO | HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent \"HTTP/1.1 200 OK\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PARSED FEATURES:\n",
"['face recognition', 'real-time face detection', 'student attendance management', 'mobile application']\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4ce3cd5b56544cb4864d5f0779063227",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature Count: 4\n",
"\n",
"- face recognition\n",
"- real-time face detection\n",
"- student attendance management\n",
"- mobile application\n",
"\n",
"Duplicate Check:\n",
"face recognition <-> real-time face detection (shared=1)\n",
"\n",
"\n"
]
}
],
"source": [
"from src.similarity_model.preprocessing import (\n",
" extract_features,\n",
" normalize_text\n",
")\n",
"\n",
"def check_duplicates(features):\n",
"\n",
" found = False\n",
"\n",
" for i in range(len(features)):\n",
" for j in range(i + 1, len(features)):\n",
"\n",
" a = set(features[i].split())\n",
" b = set(features[j].split())\n",
"\n",
" overlap = len(a & b)\n",
"\n",
" if overlap > 0:\n",
" found = True\n",
" print(\n",
" f\"{features[i]} <-> {features[j]} \"\n",
" f\"(shared={overlap})\"\n",
" )\n",
"\n",
" if not found:\n",
" print(\"No duplicate overlaps found\")\n",
"\n",
"\n",
"tests = {\n",
" \"Hospital Test\": \"\"\"\n",
" Hospital management system with\n",
" appointment booking,\n",
" online appointment booking,\n",
" patient records,\n",
" medical records,\n",
" doctor dashboard,\n",
" physician dashboard,\n",
" AI chatbot,\n",
" intelligent chatbot\n",
" \"\"\",\n",
"\n",
" \"Machine Learning Test\": \"\"\"\n",
" Machine learning system using machine learning\n",
" for machine learning prediction and machine learning analysis.\n",
" \"\"\",\n",
"\n",
" \"Face Recognition Test\": \"\"\"\n",
" Face recognition attendance system using deep learning,\n",
" computer vision,\n",
" real-time face detection,\n",
" student attendance management and mobile application.\n",
" \"\"\"\n",
"}\n",
"\n",
"for name, query in tests.items():\n",
"\n",
" print(\"=\" * 80)\n",
" print(name)\n",
" print(\"=\" * 80)\n",
"\n",
" features = extract_features(\n",
" normalize_text(query)\n",
" )\n",
"\n",
" print(f\"Feature Count: {len(features)}\")\n",
" print()\n",
"\n",
" for f in features:\n",
" print(\"-\", f)\n",
"\n",
" print(\"\\nDuplicate Check:\")\n",
" check_duplicates(features)\n",
"\n",
" print(\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "edc0890d",
"metadata": {},
"outputs": [],
"source": [
"from Data.database.sql_connector import engine\n",
"\n",
"engine.dispose()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "0a231154",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:22,479 | INFO | Loading models and artifacts...\n",
"2026-06-04 00:30:22,481 | INFO | Loading model: all-MiniLM-L6-v2\n",
"2026-06-04 00:30:22,481 | INFO | Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n",
"e:\\gradution project\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:949: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"2026-06-04 00:30:24,618 | INFO | Use pytorch device_name: cpu\n",
"2026-06-04 00:30:24,624 | INFO | Loading FAISS index...\n",
"2026-06-04 00:30:24,627 | INFO | Loading feature model: all-MiniLM-L6-v2\n",
"2026-06-04 00:30:24,628 | INFO | Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n",
"2026-06-04 00:30:26,763 | INFO | Use pytorch device_name: cpu\n",
"2026-06-04 00:30:26,767 | INFO | Loading metadata from Azure SQL...\n",
"2026-06-04 00:30:32,815 | INFO | Preparing query...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"USING GEMINI FEATURE EXTRACTOR\n",
"CALLING GEMINI\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:36,816 | INFO | [LLM] model=gemini-3.1-flash-lite-preview | task=feature | attempt=1\n",
"2026-06-04 00:30:36,817 | INFO | AFC is enabled with max remote calls: 10.\n",
"2026-06-04 00:30:37,822 | INFO | HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent \"HTTP/1.1 200 OK\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"PARSED FEATURES:\n",
"['appointment booking', 'patient records', 'doctor dashboard', 'ai chatbot']\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eff76001187242a6a509b00507dae4ee",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:37,890 | INFO | Running semantic retrieval...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a03cd362fbff43c2b60ee37fa346b9b3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:37,995 | INFO | Running hybrid ranking...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b9258e51c54f445a87adba34482d1627",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c9bba1f165354e5486b8c88ccaeef00e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fc4577ed377747f3b87e810d02179ce9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "507fec89dc7643bb87467da4e0a3d874",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "235c224eec464cf796972ffbb4764179",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d64ac601101a43e59bfdcba31ca440de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dbc0825ced57497a96d822eb5f69d133",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8e1a98329d0948b39732408daa3d3d0f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cacbca885c544c5dbd5ee851924c5e35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c1624ce6d6a143eea18a16bcf2b6d598",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "48789bbc44a84be9b2574aae502457f6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b12e1bc1c5f54918b7220c4d548c272c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d25ff7d6bcf04b88ad3f278fc1074ec0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3669c573bc0740d099cdea8534da1929",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a9b77ba859c247afb63cd9e13f6ec58f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "be0e6c66976c4ef88b04cddf583f5b75",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7beccc50a11349d4aa7eb3b83b33f9b7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "952a0e024ec347b7ace0f2e33ec63fab",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dcefd6ab863d484cb4edbf99dbf9bfce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b33e2e9264a6485aa8f7558ceb1b72e3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "da264b88b6304434af2e12621422ef53",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "549650cf75964ccfbe521e28eca314a9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06a80f5a77f645e783a6601570b9bd38",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "db35a10c1302487ab1122c3a6a0d37c9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a10c675d23a245138b950e0203c37f05",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3c9324e3f984f17acec7f24d552ec10",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c711d80cc018412a9437dacda5e046c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "382264332377473682355df0537c205f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c68bb5e2f914bc181f621974e099338",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "121a21c56f9f48849601032b46927682",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c942dadc11284abb8caa847d765222d5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ac1b3107c77b4e6c9515dd8925d388ce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "533faf5886374abc9f127b15ae388739",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1bc595ace2a14c02909f3f0f8b09148a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4e61ceaef6384da5a0b761bd5ee69165",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e27cc1a6a514cedb1b295198ee2c3af",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "433c7f69da8c4bc199455170aa52abf8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "314054bcb00d4cd7bc4c4ca86409d751",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5838180e1b4849e997ef24b8ca304a6c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ba66fd00bbe442bd99b47a4eaba434b4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" project_title | \n",
" semantic_score | \n",
" feature_score | \n",
" coverage | \n",
" hybrid_score | \n",
" duplicate_risk | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Detecting Diseases Using Chatbot and Booking C... | \n",
" 0.7480 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.05 | \n",
" Very Low | \n",
"
\n",
" \n",
" | 1 | \n",
" Clinical Information System | \n",
" 0.6479 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.05 | \n",
" Very Low | \n",
"
\n",
" \n",
" | 2 | \n",
" Doctor 4 U | \n",
" 0.6437 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.05 | \n",
" Very Low | \n",
"
\n",
" \n",
" | 3 | \n",
" Health Care Management System | \n",
" 0.6402 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.05 | \n",
" Very Low | \n",
"
\n",
" \n",
" | 4 | \n",
" Hospital Management System | \n",
" 0.6397 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.05 | \n",
" Very Low | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" project_title semantic_score \\\n",
"0 Detecting Diseases Using Chatbot and Booking C... 0.7480 \n",
"1 Clinical Information System 0.6479 \n",
"2 Doctor 4 U 0.6437 \n",
"3 Health Care Management System 0.6402 \n",
"4 Hospital Management System 0.6397 \n",
"\n",
" feature_score coverage hybrid_score duplicate_risk \n",
"0 0.0 0.0 0.05 Very Low \n",
"1 0.0 0.0 0.05 Very Low \n",
"2 0.0 0.0 0.05 Very Low \n",
"3 0.0 0.0 0.05 Very Low \n",
"4 0.0 0.0 0.05 Very Low "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = find_similar_projects(\n",
" title=\"AI Clinic Management System\",\n",
" description=\"\"\"\n",
" Smart clinic management platform with\n",
" appointment booking,\n",
" patient records,\n",
" doctor dashboard,\n",
" AI chatbot.\n",
" \"\"\",\n",
" top_k=5\n",
")\n",
"\n",
"results[[\n",
" \"project_title\",\n",
" \"semantic_score\",\n",
" \"feature_score\",\n",
" \"coverage\",\n",
" \"hybrid_score\",\n",
" \"duplicate_risk\"\n",
"]]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "5ab1315b",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e3c94f184d4f485c871ada26ed9f5abc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "13e2f339bdd544949ec9a26f472a95ef",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'score': 0.8726, 'coverage': 0.8, 'shared_count': 4, 'matches': [{'feature_a': 'appointment booking', 'feature_b': 'booking doctor appointments', 'score': 0.821}, {'feature_a': 'patient records', 'feature_b': 'medical records', 'score': 0.895}, {'feature_a': 'doctor dashboard', 'feature_b': 'doctor dashboard', 'score': 1.0}, {'feature_a': 'ai chatbot', 'feature_b': 'intelligent chatbot', 'score': 0.899}], 'unique_a': ['clinic management'], 'unique_b': ['hospital management']}\n"
]
}
],
"source": [
"project_a = [\n",
" \"appointment booking\",\n",
" \"patient records\",\n",
" \"doctor dashboard\",\n",
" \"ai chatbot\",\n",
" \"clinic management\"\n",
"]\n",
"\n",
"project_b = [\n",
" \"booking doctor appointments\",\n",
" \"medical records\",\n",
" \"doctor dashboard\",\n",
" \"intelligent chatbot\",\n",
" \"hospital management\"\n",
"]\n",
"\n",
"result = compute_feature_similarity(\n",
" project_a,\n",
" project_b\n",
")\n",
"\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "9f571cb2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"82.25\n"
]
}
],
"source": [
"from src.similarity_model import compute_originality\n",
"\n",
"print(\n",
" compute_originality(\n",
" hybrid_score=0.30,\n",
" unique_query_features=7,\n",
" total_query_features=8\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "53eeed12",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:30:41,636 | INFO | Loading processed dataset from Azure SQL...\n",
"2026-06-04 00:30:46,601 | INFO | Loading embedding model: all-MiniLM-L6-v2\n",
"2026-06-04 00:30:46,602 | INFO | Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n",
"e:\\gradution project\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:949: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"2026-06-04 00:30:49,233 | INFO | Use pytorch device_name: cpu\n",
"2026-06-04 00:30:49,243 | INFO | Generating embeddings for 255 projects...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f40167a736a840a6bd04e2b85b18c92d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/4 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:31:05,278 | INFO | FAISS index built successfully with 255 vectors.\n",
"2026-06-04 00:31:05,299 | INFO | Artifacts saved to models\n",
"2026-06-04 00:31:05,301 | INFO | Embedding engine completed successfully.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Completed\n"
]
}
],
"source": [
"from src.similarity_model.embedding_engine import (\n",
" train_embedding_engine\n",
")\n",
"\n",
"engine = train_embedding_engine()\n",
"\n",
"print(\"Training Completed\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "94ebeacc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2026-06-04 00:31:05,325 | INFO | Loading embedding model: all-MiniLM-L6-v2\n",
"2026-06-04 00:31:05,327 | INFO | Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n",
"e:\\gradution project\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:949: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"2026-06-04 00:31:07,549 | INFO | Use pytorch device_name: cpu\n",
"2026-06-04 00:31:07,583 | INFO | Artifacts loaded successfully.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4c7332342b3d4027b4960c9256eea984",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" project_id title technologies \\\n",
"0 105 Hospital Management System \n",
"1 47 Clinical Information System \n",
"2 110 Health Care Management System \n",
"3 62 Doctor 4 U \n",
"4 112 health services & medical outcomes monitoring \n",
"\n",
" similarity_score \n",
"0 0.8216 \n",
"1 0.6907 \n",
"2 0.6779 \n",
"3 0.5829 \n",
"4 0.5801 \n"
]
}
],
"source": [
"from src.similarity_model.embedding_engine import ProjectEmbedder\n",
"\n",
"engine = ProjectEmbedder()\n",
"engine.load_artifacts()\n",
"\n",
"results = engine.search(\n",
" \"hospital management system with appointment booking and patient records\",\n",
" k=5\n",
")\n",
"\n",
"print(results)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "8e5b3729",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e73c8cda22e6469cb5aa1b9620abe390",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "26ab4a1c4e28402487c2bd7ce8558359",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Batches: 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'score': 0.8866, 'coverage': 1.0, 'shared_count': 1, 'matches': [{'feature_a': 'machine learning system', 'feature_b': 'machine learning platform', 'score': 0.838}], 'unique_a': [], 'unique_b': ['ml analytics']}\n"
]
}
],
"source": [
"result = compute_feature_similarity(\n",
" [\n",
" \"machine learning system\",\n",
" \"machine learning prediction\",\n",
" \"machine learning analysis\"\n",
" ],\n",
" [\n",
" \"machine learning platform\",\n",
" \"predictive machine learning\",\n",
" \"ml analytics\"\n",
" ]\n",
")\n",
"\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "3f0b789e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.05\n"
]
}
],
"source": [
"from src.similarity_model.hybrid_ranker import (\n",
" compute_hybrid_score\n",
")\n",
"\n",
"print(\n",
" compute_hybrid_score(\n",
" semantic_score=0.95,\n",
" feature_score=0.0,\n",
" coverage=0.0,\n",
" feature_count=5,\n",
" unique_query_count=5\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "5c2e1ed5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"id:\n",
"207\n",
"\n",
"submitted_at:\n",
"NaT\n",
"\n",
"project_title:\n",
"Smart Library\n",
"\n",
"student_names:\n",
"Abdel Hamid Abdel Nasser, Mahmoud Tamer Mahmoud, Amer Saed Mohamed Ali Amer, Tahany Adel Faragallah, Hala Ahmed Saad Salem, Mohamed Khaled Mohamed\n",
"\n",
"year:\n",
"2022\n",
"\n",
"abstract:\n",
"Egypt is striving and our efforts are focused these days towards digital transformation and the nationalization of all its government facilities, including the higher education sector. With more than 4 million university students and up to 644, 000 graduates annually, we need smart digital systems that support the educational process and scientific research. Therefore, we have developed a smart library application that takes care of books, recommendations, and user opinions, and provides the appropriate electronic environment for university students to find and nominate appropriate books through an electronic application based on artificial intelligence. Where, using artificial intelligence algorithms, the application will analyze book data and student data together to choose the most appropriate scientific content, in addition to the chatbot is designed to intelligently simulate human conversations. Finally, the smart library provides books to students faster and easier, and encourages them to read and benefit from their information, and the presence of suggestions for similar books will make them not stop reading and expand their horizons, and also the presence of a chatbot will increase the ease of access to books.\n",
"\n",
"description:\n",
"The Smart Library project is a digital platform designed to modernize university library systems in Egypt. It integrates AI-driven book recommendations, an interactive chatbot for user assistance, social groups for collaborative reading, and a QR-code-based borrowing system to streamline library operations and improve student access to academic resources.\n",
"\n",
"problem_statement:\n",
"University libraries in Egypt face delays in digital transformation, relying on traditional, non-interactive systems. This leads to inefficient resource usage, difficulty for students in finding relevant academic materials, and a lack of engagement, ultimately hindering the educational process.\n",
"\n",
"proposed_solution:\n",
"The project proposes an AI-powered smart library application that features a machine learning recommendation engine, an intelligent chatbot for conversational support, social networking features for students, and a QR-code system for automated book borrowing and management.\n",
"\n",
"objectives:\n",
"1. Provide accurate and reliable functionality.\n",
"2. Provide interactive educational tools and resources.\n",
"3. Improve decision-making using artificial intelligence techniques.\n",
"4. Implement intelligent AI-based functionalities.\n",
"5. Improve system performance and reliability.\n",
"6. Improve learning experience and educational accessibility.\n",
"\n",
"full_content:\n",
"Smart Library. Smart Library. Egypt is striving and our efforts are focused these days towards digital transformation and the nationalization of all its government facilities, including the higher education sector. With more than 4 million university students and up to 644, 000 graduates annually, we need smart digital systems that support the educational process and scientific research. Therefore, we have developed a smart library application that takes care of books, recommendations, and user opinions, and provides the appropriate electronic environment for university students to find and nominate appropriate books through an electronic application based on artificial intelligence. Where, using artificial intelligence algorithms, the application will analyze book data and student data together to choose the most appropriate scientific content, in addition to the chatbot is designed to intelligently simulate human conversations. Finally, the smart library provides books to students faster and easier, and encourages them to read and benefit from their information, and the presence of suggestions for similar books will make them not stop reading and expand their horizons, and also the presence of a chatbot will increase the ease of access to books.. The Smart Library project is a digital platform designed to modernize university library systems in Egypt. It integrates AI-driven book recommendations, an interactive chatbot for user assistance, social groups for collaborative reading, and a QR-code-based borrowing system to streamline library operations and improve student access to academic resources.\n",
"\n",
"clean_text:\n",
"smart library. smart library. egypt is striving and our efforts are focused these days towards digital transformation and the nationalization of all its government facilities including the higher education sector. with more than 4 million university students and up to 644 000 graduates annually we need smart digital systems that support the educational process and scientific research. therefore we have developed a smart library application that takes care of books recommendations and user opinions and provides the appropriate electronic environment for university students to find and nominate appropriate books through an electronic application based on artificial intelligence. where using artificial intelligence algorithms the application will analyze book data and student data together to choose the most appropriate scientific content in addition to the chatbot is designed to intelligently simulate human conversations. finally the smart library provides books to students faster and easier and encourages them to read and benefit from their information and the presence of suggestions for similar books will make them not stop reading and expand their horizons and also the presence of a chatbot will increase the ease of access to books.. the smart library project is a digital platform designed to modernize university library systems in egypt. it integrates ai-driven book recommendations an interactive chatbot for user assistance social groups for collaborative reading and a qr-code-based borrowing system to streamline library operations and improve student access to academic resources.\n",
"\n",
"word_count:\n",
"233\n",
"\n",
"features:\n",
"\"\\\"[\\\\\\\"Artificial intelligence algorithms\\\\\\\", \\\\\\\"AI-driven book recommendations\\\\\\\", \\\\\\\"Interactive chatbot\\\\\\\", \\\\\\\"Social groups for collaborative reading\\\\\\\", \\\\\\\"QR-code-based borrowing system\\\\\\\"]\\\"\"\n"
]
}
],
"source": [
"row = clean_df[\n",
" clean_df[\"project_title\"] == \"Smart Library\"\n",
"].iloc[0]\n",
"\n",
"for column in clean_df.columns:\n",
" print(f\"\\n{column}:\")\n",
" print(row[column])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f64358c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}