{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "9f05400954c94f54b911382c9b745db5": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [], "layout": "IPY_MODEL_ffc6204586fa4f48ae3ec3c0ebb56df6" } }, "b693c845caa9419a81ec7411d9db0a22": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f8d3f77fa01a44d58df2da6ed17bc380", "placeholder": "", "style": "IPY_MODEL_c62e79a5022d403fbabb51b4d6166a61", "value": "
| Step | \n", "Training Loss | \n", "
|---|---|
| 10 | \n", "3.208000 | \n", "
| 20 | \n", "2.711800 | \n", "
| 30 | \n", "2.548400 | \n", "
| 40 | \n", "2.516100 | \n", "
| 50 | \n", "2.423000 | \n", "
| 60 | \n", "2.324900 | \n", "
| 70 | \n", "2.197400 | \n", "
| 80 | \n", "2.192600 | \n", "
| 90 | \n", "2.206500 | \n", "
| 100 | \n", "2.069800 | \n", "
| 110 | \n", "2.011300 | \n", "
| 120 | \n", "2.018900 | \n", "
| 130 | \n", "1.972000 | \n", "
| 140 | \n", "2.023400 | \n", "
| 150 | \n", "2.004300 | \n", "
| 160 | \n", "1.834200 | \n", "
| 170 | \n", "2.014500 | \n", "
| 180 | \n", "2.012000 | \n", "
" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "🎉🎉🎉 اكتمل التدريب بنجاح! 🎉🎉🎉\n", "==================================================\n", "\n", "🔄 8. جاري رفع المودل النهائي وملف (المُقطّع) إلى 'ara-bert-okaz-style' في حسابك...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Processing Files (0 / 0) : | | 0.00B / 0.00B " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "59a66af0d9aa4bfd971078796c467736" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "New Data Upload : | | 0.00B / 0.00B " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "6f9b410204e34c4495788ba1e0c92087" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...z-style/training_args.bin: 100%|##########| 5.84kB / 5.84kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "79f5cbf8c40b43f398c627e3d5537ad6" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...84319.da3d9d6af63c.1620.0: 100%|##########| 4.98kB / 4.98kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "d27e016708a74a8ba9be93d7d094a5f6" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...z-style/model.safetensors: 6%|6 | 33.5MB / 541MB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "9dd4d29789b84a81aef763fbec918c7a" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "❌ حدث خطأ أثناء محاولة الرفع. لا تقلق، المودل تم تدريبه وحفظه محلياً في مجلد 'ara-bert-okaz-style'.\n", "الخطأ: 'NoneType' object has no attribute 'whoami'\n" ] } ] }, { "cell_type": "code", "source": [ "# ==============================================================================\n", "# الخطوة 4.1: الرفع اليدوي (تصحيح خطأ 404)\n", "# ==============================================================================\n", "from huggingface_hub import HfApi, create_repo\n", "import os\n", "\n", "# --- 4.1: الإعدادات ---\n", "MODEL_BASENAME = \"ara-bert-okaz-style\" # الاسم الأساسي\n", "LOCAL_MODEL_DIR = \"./\" + MODEL_BASENAME\n", "\n", "print(f\"🔄 جاري محاولة رفع المودل من المجلد المحلي: {LOCAL_MODEL_DIR}\")\n", "\n", "try:\n", " api = HfApi()\n", "\n", " # 1. جلب اسم المستخدم (لإصلاح خطأ 404)\n", " user_info = api.whoami()\n", " username = user_info['name']\n", " print(f\"✅ تم التعرف عليك باسم المستخدم: {username}\")\n", "\n", " # 2. إنشاء اسم المستودع الكامل (Full Repo ID)\n", " FULL_REPO_ID = f\"{username}/{MODEL_BASENAME}\"\n", " print(f\"🔧 سيتم الرفع إلى المستودع: {FULL_REPO_ID}\")\n", "\n", " # --- 4.2: إنشاء المستودع (Repo) ---\n", " print(f\"🔄 جاري إنشاء/تأكيد المستودع: {FULL_REPO_ID}...\")\n", " # (نستخدم الـ ID الكامل للتأكيد)\n", " repo_url = create_repo(repo_id=FULL_REPO_ID, exist_ok=True, private=False) # private=False لجعله عاماً\n", " print(f\"✅ المستودع جاهز: {repo_url}\")\n", "\n", " # --- 4.3: رفع الملفات ---\n", " print(f\"🚀 جاري رفع جميع الملفات من {LOCAL_MODEL_DIR} إلى المستودع...\")\n", "\n", " # [هذا هو الإصلاح] استخدام الـ ID الكامل (FULL_REPO_ID)\n", " api.upload_folder(\n", " folder_path=LOCAL_MODEL_DIR,\n", " repo_id=FULL_REPO_ID,\n", " repo_type=\"model\",\n", " commit_message=\"رفع المودل المدرب (إصدار 1) - 143 مقالاً من عكاظ\"\n", " )\n", "\n", " print(\"\\n\" + \"=\"*50)\n", " print(f\"🎉🎉🎉 تم رفع المودل بنجاح! 🎉🎉🎉\")\n", " print(f\"يمكنك الآن رؤية مودلك الخاص على الرابط:\")\n", " print(f\"https://huggingface.co/{FULL_REPO_ID}\") # طباعة الرابط الكامل\n", " print(\"=\"*50)\n", "\n", "except Exception as e:\n", " print(f\"❌ حدث خطأ كبير أثناء الرفع اليدوي. الخطأ: {e}\")\n", " print(\"لا تقلق، المودل لا يزال محفوظاً محلياً. تحقق من اتصالك بالإنترنت ومن الرمز (Token).\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 609, "referenced_widgets": [ "0b04bdcc94fb4844a59bc9044bc70459", "eb5a7ca27b1f4d408aa9ec282c9b7ed4", "4e8def5807e24d6ab7b4bd3618b151dc", "5a401ed8fc934b028eaf52d0409646b1", "80df277832944307886a8f5b87a8cd2d", "91235bc4c9fd4809b075dddb4c0e7066", "017f30cf6d4049d9b1916e360340ac9b", "3d969e88632549ddb159f2b9bfe04c72", "97566af068724ad49571986cc37dc2ba", "4e8683d2a95143679d2844aa618c43cb", "07de27b62c8647a28967e92e1a50d359", "7ace37e5399f4ca59feef9864ceef0d8", "456b2626cdeb44db93a8e4979b16a5d5", "ffab928082c14e679b1e6da297b48de7", "46e62b2cb5284e908bd3e7d8cf324ee3", "2153470cd0974f1ab0b8c55ca2b7f710", "b7275cdac40c42689c9adc5a24d92b83", "7d862c912cf44b399fd1ab4e21e15198", "51c6e4f1b4cf49ca8c3ba9fdb5de6201", "d03f9b32c789442b946689482396957b", "a40d87f094324b2ca6f67c5545aa3f5f", "9150bbe92f394270a624b6b65af777a9", "1a42c8f2a373434a86a6a0c6aa03c5fd", "092683b16871438f8aeeef0088dd49da", "02098f4b99de4223a9ffb987f9832752", "d697baa025bb483aaebd2442f3861ceb", "027ffc00064b47dc97891949c837dba7", "9713c8564dc647a889db00dd175a934d", "52856e9ac25947d598a8998d8c083012", "675f25692ee14a0bb4cdf4adf63f8c63", "1fdab4dd2ccb462fb3dc4393826c5bb5", "b720501ef78f4728a951132498906139", "4d8bafe00a7a4a5a8fdb0d81fc6931fb", "6076063db00e49a88082926890fd50ca", "2746284f8d0743228eb56b00e92c88f4", "79ef1b0bfb1b4815bbf543418677b78f", "3a76e4da41c94355b46982ab126b5f62", "cb8ef8d0ec124817941b5b9aa3e68b5d", "d43439f07b8c438789fec254835da599", "2719ea1429eb4943a8c142f7f81559e1", "61ce006eb1b948efa2a73d672855543f", "20866e7a56714e89a658ebe72ef0ddec", "37b105e6d3454719877a55005078e95c", "37367e7b36684e49883b88ac01c1df22", "c2e2904cc4524575a849327cc9edf64c", "7ba0d6156e6a49cdb2736964012c7872", "8c79c4d7f39146ccb7415a9acb96a06a", "e1e4cf1a40974c079968dc39182e8e29", "792397aef5e14a87a52b0a51cc5e72b9", "3779dbf8004a4884bfdcac2ef6f48081", "af53be38e1cf4aaeb034b80a22b64876", "31b423cf48fe4e6e8e6aca371e84b7f8", "332769aabc2d483485bd991bdf44b440", "eebb2438fce741f193eb9a5c77f8e75e", "007ab99508b3404f808c2f27eb2b6171", "4710d4a1172b489eb23b66ee7f66b2b3", "bced8bf343f54a5293ce5a0cbdecb71a", "01be2e1d12d2451d9f872761eb3b9005", "183aa91b8a4947268953f69451edabe2", "cabaf134ff6d4c5482db41afed638f7c", "978c074245ac41899ad11e9c270cc964", "811bc05b7c2146b28242696c9052b66c", "27cc3ee4f61b4ee2a133dc3705b3d948", "086aac3559424052948b43667b089ce5", "68dc547d15794d55becb8a31d316b059", "288456b4f2a84024b7da168dedd1f17b", "2856daca249d4bfb86def9f92311a692", "fe74964d932b4e0a90fdee9dba775e3f", "d14cbeb71a0b41ecbc1e66cff70fe9a9", "a7534b32183241fe8f70b6a3ddb1db3c", "1961385bf5f140799cca9be4bd474b56", "7ca60b7b38084cd58a31f8ab9bbb492a", "e3441e191bc64eb3bcf5037c6d49bde2", "1989c4f86caf4a0c9bd845255f509ac7", "3db56186d1d640a298aabf878a5f163d", "17dc4b081c244dee962c2122e58a6634", "8d0886e85ece43e5a6d8a45f6037e772", "c3bf061a63834921add105e47cc75e2d", "ef201662b7d146218d09e0f6778a1c37", "6f9070319fa94a3b8fe00e190226c338", "99746191357c4e27baf182d6260431f4", "8bd7ca9319d748a0a6a68bed7b47ad71", "c0d7ecf2d1174b2d8eaa8d6df3db3574", "c0af2578e8d440a69f04a24db8813561", "17dd28e575604558b9c2ec7c6992c4e0", "a74841b608414e80994055d2362ebefc", "120af1de8af643989f58487b732c753f", "bf4c79479aab4bacaa69b6c7b895542b", "c13b0bb0bfc8437c88a4eddc88742fe2", "27a32f086a254b6194784c3173e5519b", "56ba40adb0e347f4b20e44b9bcff505e", "3a4464f8a1434ebeaeb4c3f4bd0b5325", "6fa190c2b48347e3acdc7d8bc7976539", "77b28781b34e417580d2a22a86d906a3", "e52423cc978e477183b5807afad0ed24", "96f7640b939546b58ff4f553eefa68cb", "965a1670e4314ae7b6f0befcb67dafb1", "afc98ec4f13e4544a48e511f6926dd6d", "ae307a32b7904305aa97c7387e8fb956", "ecab591f4c1248c79debad89e2a186ac", "95de64f4082d494bac32296c7b55a6f2", "97802c5be9fe442ca238699a40ad0a33", "952ab1611fdb4f2284b10778748552fb", "786e77bf6f704f2d916535862d8fdf1f", "15d814746310478990b1479bf5fa453e", "326a5a8bd3874d0680394bf4462b000f", "5749f91f001a47109f917c47f4ce12fc", "2fad7571baa744beb901f350dcb7d2a3", "eb2a284606d64ab19890c75c82ab1910", "a2904da7a29142d4afb1b7c6b542477c", "093b3cd6dc6d4431bfb2bfa13a73a68d", "dbc5e0c4de144866a83a91b810e2e10f", "aec5d935328a459a9ed0cf83895f8108", "d26e763a9e8d4ab29d769c4662bf7114", "333419a82e4449b0882f17e3f92adf0d", "daf0600ae373400b8d5bcd76d5ab3e23", "807ca2d881244714810bedf37db0f529", "da94918a2e0b41ed884728d107711975", "4f5c0d6f034e488faf2cc330f9b035e1", "77aa8e86f3f64c1cb728ccc32b1f19de", "5708dc2a6cf043a3a8381db986eb043d", "09b52db9bcbd4dc4a2a607cbc590e8f9", "81225d0c5b564a0aa2917c7585abb4b3", "264c0e73420c4b919e6c1e8016ae0ef5", "f2085fcebe90467b98e98723d45f297c", "7652f4e996774da4a56c3754be71ea7d", "61cffbf35fca4ddf96b2df00c1e2b571", "756941e076284466b5b6c661ec8677e0", "e92cb5664de147ddb3678db7768065f1", "7a7a4cddcdc24beebf1e5efb7f246506", "3f4eda7e20334a1eb4554f2d77616325", "c21e5c8dbf604e19b7cb7e5ab24c93c6" ] }, "id": "DYfBkrgYakhD", "outputId": "e6bdad62-e55d-4099-9a80-7e4e6ca21d3b" }, "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "🔄 جاري محاولة رفع المودل من المجلد المحلي: ./ara-bert-okaz-style\n", "✅ تم التعرف عليك باسم المستخدم: alomari7\n", "🔧 سيتم الرفع إلى المستودع: alomari7/ara-bert-okaz-style\n", "🔄 جاري إنشاء/تأكيد المستودع: alomari7/ara-bert-okaz-style...\n", "✅ المستودع جاهز: https://huggingface.co/alomari7/ara-bert-okaz-style\n", "🚀 جاري رفع جميع الملفات من ./ara-bert-okaz-style إلى المستودع...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Processing Files (0 / 0) : | | 0.00B / 0.00B " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "0b04bdcc94fb4844a59bc9044bc70459" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "New Data Upload : | | 0.00B / 0.00B " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "7ace37e5399f4ca59feef9864ceef0d8" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...z-style/training_args.bin: 100%|##########| 5.84kB / 5.84kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "1a42c8f2a373434a86a6a0c6aa03c5fd" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...oint-18/training_args.bin: 100%|##########| 5.84kB / 5.84kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "6076063db00e49a88082926890fd50ca" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...int-180/model.safetensors: 2%|1 | 8.34MB / 541MB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "c2e2904cc4524575a849327cc9edf64c" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...int-108/model.safetensors: 2%|1 | 8.36MB / 541MB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "4710d4a1172b489eb23b66ee7f66b2b3" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...int-180/training_args.bin: 100%|##########| 5.84kB / 5.84kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "2856daca249d4bfb86def9f92311a692" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...oint-36/model.safetensors: 2%|1 | 8.33MB / 541MB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "c3bf061a63834921add105e47cc75e2d" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...oint-36/training_args.bin: 100%|##########| 5.84kB / 5.84kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "c13b0bb0bfc8437c88a4eddc88742fe2" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...oint-54/model.safetensors: 2%|1 | 8.38MB / 541MB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "ecab591f4c1248c79debad89e2a186ac" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...eckpoint-180/scheduler.pt: 100%|##########| 1.47kB / 1.47kB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "093b3cd6dc6d4431bfb2bfa13a73a68d" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ " ...eckpoint-180/optimizer.pt: 0%| | 527kB / 1.08GB " ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "09b52db9bcbd4dc4a2a607cbc590e8f9" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "\n", "==================================================\n", "🎉🎉🎉 تم رفع المودل بنجاح! 🎉🎉🎉\n", "يمكنك الآن رؤية مودلك الخاص على الرابط:\n", "https://huggingface.co/alomari7/ara-bert-okaz-style\n", "==================================================\n" ] } ] }, { "cell_type": "code", "source": [ "# حذف المجلد المحلي للمودل (16.8 جيجا)\n", "!rm -rf ./ara-bert-okaz-style\n", "print(\"✅ تم حذف المجلد المحلي للمودل وتوفير 16.8 جيجا من مساحة Colab.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pj_cLDIDdIrC", "outputId": "b4af839a-f73c-4f9b-a75f-2358f6755dc1" }, "execution_count": 16, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "✅ تم حذف المجلد المحلي للمودل وتوفير 16.8 جيجا من مساحة Colab.\n" ] } ] }, { "cell_type": "code", "source": [ "# ==============================================================================\n", "# الخطوة 5: توثيق المشروع ورفعه إلى GitHub\n", "# ==============================================================================\n", "\n", "# --- 5.1: إنشاء ملف التوثيق (README.md) ---\n", "print(\"✍️ جاري كتابة ملف التوثيق (README.md)...\")\n", "\n", "# (لقد كتبت لك ملف توثيق احترافي جاهز)\n", "# هو يربط تلقائياً بمودلك على Hugging Face الذي أنشأته\n", "README_CONTENT = \"\"\"\n", "# مشروع سحب وتحليل مقالات عكاظ (Okaz NLP Project)\n", "\n", "هذا المشروع هو عبارة عن خط أنابيب (Pipeline) كامل لمعالجة اللغات الطبيعية (NLP)، يبدأ من سحب البيانات الحية وينتهي بإنشاء مودل لغوي مُصقل (Fine-Tuned).\n", "\n", "## 🚀 المودل المدرب (Fine-Tuned Model)\n", "\n", "النتيجة النهائية لهذا المشروع هو مودل لغوي عربي مُعدل على أسلوب مقالات عكاظ.\n", "**المودل متاح على Hugging Face هنا:**\n", "[https://huggingface.co/alomari7/ara-bert-okaz-style](https://huggingface.co/alomari7/ara-bert-okaz-style)\n", "\n", "---\n", "\n", "## 🔧 مكونات المشروع\n", "\n", "هذا المستودع يحتوي على الكود الكامل للمشروع، مقسماً إلى 3 مراحل:\n", "\n", "### 1. السحب (Scraping)\n", "- **الكود:** `scraper_v13.py` (أو `scapping2.ipynb`)\n", "- **الهدف:** سحب المقالات الحية من موقع \"عكاظ\".\n", "- **الاستراتيجية (إصدار 13):**\n", " 1. يستخدم `undetected-chromedriver` لتجاوز حماية الموقع.\n", " 2. يقوم بـ \"الزحف العميق\" (Deep Crawl) لـ 6 أقسام رئيسية (محليات، سياسة، اقتصاد، رياضة، ثقافة، منوعات).\n", " 3. يجمع كل روابط المقالات من الصفحات الأولى لهذه الأقسام.\n", " 4. يقارن الروابط الجديدة بالملف المحفوظ (`okaz_articles_full.csv`) ويسحب المقالات الجديدة فقط.\n", " 5. يحفظ البيانات في `okaz_articles_full.csv`.\n", "\n", "### 2. التنظيف والمعالجة (Cleaning & Analysis)\n", "- **الكود:** `analysis.py`\n", "- **الهدف:** تحويل البيانات الخام إلى بيانات نظيفة جاهزة للتحليل وتعلم الآلة.\n", "- **الخطوات:**\n", " 1. **التنظيف:** تحميل `okaz_articles_full.csv`، إزالة الضجيج (مثل \"اقرأ أيضاً\")، وحفظه في `okaz_articles_cleaned.csv`.\n", " 2. **تحليل التكرار:** قراءة الملف النظيف، إزالة الكلمات المستبعدة (Stop Words)، وعدّ الكلمات الأكثر تكراراً لحفظها في `okaz_word_frequency.csv`.\n", "\n", "### 3. التدريب (Fine-Tuning)\n", "- **الكود:** `fine_tuning.ipynb`\n", "- **الهدف:** صقل (Fine-Tune) مودل لغوي على بياناتنا.\n", "- **الخطوات:**\n", " 1. **المودل الأساسي:** `aubmindlab/bert-base-arabertv2` (AraBERT).\n", " 2. **المهمة:** Masked Language Modeling (MLM) - جعل المودل يتنبأ بالكلمات المخفية في مقالات عكاظ.\n", " 3. **البيانات:** 143 مقالاً نظيفاً.\n", " 4. **النتيجة:** مودل `ara-bert-okaz-style` الجديد.\n", "\n", "---\n", "\n", "## ⚙️ كيفية تشغيل المشروع\n", "1. تأكد من وجود جميع المكتبات (انظر ملف `requirements.txt` أو الكود).\n", "2. قم بتشغيل كود السحب (`scraper_v13.py`) لجمع البيانات.\n", "3. قم بتشغيل كود التحليل (`analysis.py`) لتنظيفها.\n", "4. (اختياري) قم بتشغيل كود التدريب لإنشاء المودل الخاص بك.\n", "\"\"\"\n", "\n", "with open(\"README.md\", \"w\", encoding=\"utf-8\") as f:\n", " f.write(README_CONTENT)\n", "print(\"✅ تم إنشاء README.md بنجاح.\")\n", "\n", "# --- 5.2: إنشاء ملف (gitignore) ---\n", "print(\"✍️ جاري كتابة ملف .gitignore...\")\n", "GITIGNORE_CONTENT = \"\"\"\n", "# تجاهل الملفات المحلية والبيانات الحساسة\n", ".ipynb_checkpoints/\n", "__pycache__/\n", "*.csv\n", "/content/drive/\n", "ara-bert-okaz-style/\n", "*.zip\n", "*.deb\n", "chromedriver\n", "\"\"\"\n", "with open(\".gitignore\", \"w\", encoding=\"utf-8\") as f:\n", " f.write(GITIGNORE_CONTENT)\n", "print(\"✅ تم إنشاء .gitignore بنجاح.\")\n", "\n", "# --- 5.3: تسجيل الدخول إلى GitHub ---\n", "# نحتاج لتثبيت هذه المكتبة لنتعامل مع GitHub\n", "!pip install huggingface_hub\n", "from huggingface_hub import HfApi, HfFolder, create_repo, notebook_login\n", "\n", "print(\"\\n🔑 الرجاء تسجيل الدخول إلى GitHub.\")\n", "print(\"ملاحظة: أنت لا تحتاج إلى 'رمز' (Token) جديد، يمكنك استخدام نفس الرمز الذي استخدمته لـ Hugging Face.\")\n", "notebook_login()\n", "\n", "# --- 5.4: إنشاء ورفع المشروع إلى GitHub ---\n", "print(\"\\n🚀 جاري إنشاء ورفع المشروع إلى GitHub...\")\n", "\n", "try:\n", " api = HfApi()\n", "\n", " # جلب اسم المستخدم (لإنشاء المستودع بالاسم الصحيح)\n", " user_info = api.whoami()\n", " username = user_info['name']\n", "\n", " GITHUB_REPO_ID = f\"{username}/Okaz-NLP-Project\"\n", "\n", " print(f\"🔧 سيتم إنشاء المستودع باسم: {GITHUB_REPO_ID}\")\n", "\n", " # 1. إنشاء المستودع على GitHub\n", " create_repo(repo_id=GITHUB_REPO_ID, repo_type=\"space\", space_sdk=\"static\", exist_ok=True)\n", " print(f\"✅ تم إنشاء مستودع GitHub (أو تأكيده): https://huggingface.co/spaces/{GITHUB_REPO_ID}\")\n", " print(\"ملاحظة: يتم استضافة مشاريع GitHub على 'Spaces' في Hugging Face.\")\n", "\n", " # 2. رفع ملفات التوثيق\n", " # (سنرفع فقط ملفات التوثيق والكود، وليس البيانات أو المودل)\n", " api.upload_file(\n", " path_or_fileobj=\"README.md\",\n", " path_in_repo=\"README.md\",\n", " repo_id=GITHUB_REPO_ID,\n", " repo_type=\"space\"\n", " )\n", " api.upload_file(\n", " path_or_fileobj=\".gitignore\",\n", " path_in_repo=\".gitignore\",\n", " repo_id=GITHUB_REPO_ID,\n", " repo_type=\"space\"\n", " )\n", "\n", " print(\"\\n\" + \"=\"*50)\n", " print(f\"🎉🎉🎉 تم رفع ملفات التوثيق بنجاح! 🎉🎉🎉\")\n", " print(\"المشروع أصبح موثقاً وجاهزاً للمشاركة.\")\n", " print(f\"🔗 رابط المشروع على (GitHub/Hugging Face Spaces): https://huggingface.co/spaces/{GITHUB_REPO_ID}\")\n", " print(\"=\"*50)\n", "\n", " print(\"\\n💡 **هام جداً:**\")\n", " print(\"لإضافة ملفات الكود (ملفات .ipynb)، اذهب إلى الرابط أعلاه، واختر 'Files and versions'،\")\n", " print(\"ثم اختر 'Add file' -> 'Upload file' وقم برفع ملفات Colab الخاصة بك يدوياً.\")\n", "\n", "except Exception as e:\n", " print(f\"❌ حدث خطأ أثناء الرفع إلى GitHub: {e}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 677, "referenced_widgets": [ "66205630a65549cf8321ae1193fbe628", "16c5d6ba701649a39d71cb8873162045", "41637d8c603345599fb86378e8020441", "8f22fac253d84384aa323330721c0d1c", "07de9786185b4f7b81d0ab9e8c618b66", "bfed8e3d0c2a45ea9193bd469e030ba6", "ae9c9b03566d40e49e89f17d3b8b01bb", "377716885c4143f79564530f8b81a497", "44943c624ca64a7f8cbb1c90f780f49d", "296bc6a0703d45bea9d10e99d9a501aa", "45314e8abee24b7a9136fac66cd7490e", "2964d10253e64bf4b245a89905a0e797", "5e0dfd1a075744b7afed8eea9b44a100", "0d7a86c05c9541fd9a35fd7e7a6a7278", "22657ee52996487b8ca5d823946a729f", "e6d9c79d7c144e7c96f73ba2d981f4d7", "e03b807c4f734137846e2d02e55e8a9c", "6fb3095abe3d4a4b84b11b9096831f2d", "d919f667759c45a5bab8869ff20ceea4", "0060de381bf94a668707c9376559093c" ] }, "id": "XVPYykfodJwV", "outputId": "33151131-7092-4035-a1ef-32e88a610008" }, "execution_count": 17, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "✍️ جاري كتابة ملف التوثيق (README.md)...\n", "✅ تم إنشاء README.md بنجاح.\n", "✍️ جاري كتابة ملف .gitignore...\n", "✅ تم إنشاء .gitignore بنجاح.\n", "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.12/dist-packages (0.36.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (3.20.0)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2025.3.0)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (25.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (6.0.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2.32.5)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.67.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.15.0)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (1.2.0)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.4.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.11)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2025.10.5)\n", "\n", "🔑 الرجاء تسجيل الدخول إلى GitHub.\n", "ملاحظة: أنت لا تحتاج إلى 'رمز' (Token) جديد، يمكنك استخدام نفس الرمز الذي استخدمته لـ Hugging Face.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "VBox(children=(HTML(value='