Che237 commited on
Commit
6bffcde
·
verified ·
1 Parent(s): e08ee7b

Fix cells 8-10 to be self-contained

Browse files
notebooks/00_environment_setup.ipynb CHANGED
@@ -204,23 +204,7 @@
204
  "id": "776236f8",
205
  "metadata": {},
206
  "outputs": [],
207
- "source": [
208
- "# Create necessary directories\n",
209
- "directories = [\n",
210
- " config.DATASETS_DIR,\n",
211
- " config.MODELS_DIR,\n",
212
- " config.ARTIFACTS_DIR,\n",
213
- " config.BASE_DIR / \"logs\",\n",
214
- " config.BASE_DIR / \"cache\",\n",
215
- "]\n",
216
- "\n",
217
- "print(\"Creating directory structure...\")\n",
218
- "for directory in directories:\n",
219
- " directory.mkdir(parents=True, exist_ok=True)\n",
220
- " print(f\" \u2713 {directory}\")\n",
221
- "\n",
222
- "print(\"\\n\u2713 Directory structure ready!\")"
223
- ]
224
  },
225
  {
226
  "cell_type": "markdown",
@@ -236,34 +220,7 @@
236
  "id": "6b854bac",
237
  "metadata": {},
238
  "outputs": [],
239
- "source": [
240
- "import json\n",
241
- "\n",
242
- "# Export configuration for other notebooks\n",
243
- "notebook_config = {\n",
244
- " \"device\": str(DEVICE),\n",
245
- " \"python_version\": f\"{python_version.major}.{python_version.minor}.{python_version.micro}\",\n",
246
- " \"torch_version\": torch.__version__,\n",
247
- " \"cuda_available\": cuda_available,\n",
248
- " \"base_dir\": str(config.BASE_DIR),\n",
249
- " \"datasets_dir\": str(config.DATASETS_DIR),\n",
250
- " \"models_dir\": str(config.MODELS_DIR),\n",
251
- " \"artifacts_dir\": str(config.ARTIFACTS_DIR),\n",
252
- " \"random_state\": config.RANDOM_STATE,\n",
253
- " \"test_size\": config.TEST_SIZE,\n",
254
- " \"cv_folds\": config.CV_FOLDS,\n",
255
- " \"gemini_configured\": bool(config.GEMINI_API_KEY),\n",
256
- " \"huggingface_configured\": bool(config.HUGGINGFACE_TOKEN),\n",
257
- " \"created_at\": str(pd.Timestamp.now())\n",
258
- "}\n",
259
- "\n",
260
- "config_path = config.BASE_DIR / \"notebook_config.json\"\n",
261
- "with open(config_path, \"w\") as f:\n",
262
- " json.dump(notebook_config, f, indent=2)\n",
263
- "\n",
264
- "print(f\"\u2713 Configuration saved to: {config_path}\")\n",
265
- "print(\"\\n\" + json.dumps(notebook_config, indent=2))"
266
- ]
267
  },
268
  {
269
  "cell_type": "markdown",
@@ -279,24 +236,7 @@
279
  "id": "f409be56",
280
  "metadata": {},
281
  "outputs": [],
282
- "source": [
283
- "print(\"\\n\" + \"=\" * 60)\n",
284
- "print(\"ENVIRONMENT SETUP COMPLETE\")\n",
285
- "print(\"=\" * 60)\n",
286
- "print(f\"\"\"\n",
287
- "\u2705 Python: {python_version.major}.{python_version.minor}.{python_version.micro}\n",
288
- "\u2705 Device: {DEVICE}\n",
289
- "\u2705 PyTorch: {torch.__version__}\n",
290
- "\u2705 Gemini API: {'Ready' if config.GEMINI_API_KEY else 'Not configured'}\n",
291
- "\u2705 HuggingFace: {'Ready' if config.HUGGINGFACE_TOKEN else 'Not configured'}\n",
292
- "\u2705 WebScraper API: Ready\n",
293
- "\u2705 Directories: Created\n",
294
- "\n",
295
- "You can now proceed to the next notebook:\n",
296
- " \u2192 01_data_acquisition.ipynb\n",
297
- "\"\"\")\n",
298
- "print(\"=\" * 60)"
299
- ]
300
  }
301
  ],
302
  "metadata": {
 
204
  "id": "776236f8",
205
  "metadata": {},
206
  "outputs": [],
207
+ "source": "from pathlib import Path\n\n# Define directories (self-contained)\nBASE_DIR = Path('..').resolve()\nDATASETS_DIR = BASE_DIR / 'datasets'\nMODELS_DIR = BASE_DIR / 'models'\nARTIFACTS_DIR = BASE_DIR / 'artifacts'\n\n# Create necessary directories\ndirectories = [\n DATASETS_DIR,\n MODELS_DIR,\n ARTIFACTS_DIR,\n BASE_DIR / 'logs',\n BASE_DIR / 'cache',\n]\n\nprint('Creating directory structure...')\nfor directory in directories:\n directory.mkdir(parents=True, exist_ok=True)\n print(f' \u2713 {directory}')\n\nprint('\\n\u2713 Directory structure ready!')\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  },
209
  {
210
  "cell_type": "markdown",
 
220
  "id": "6b854bac",
221
  "metadata": {},
222
  "outputs": [],
223
+ "source": "import json\nimport sys\nimport os\nfrom pathlib import Path\n\n# Get values (self-contained)\npython_version = sys.version_info\n\ntry:\n import torch\n torch_version = torch.__version__\n cuda_available = torch.cuda.is_available()\n if cuda_available:\n DEVICE = 'cuda'\n elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():\n DEVICE = 'mps'\n else:\n DEVICE = 'cpu'\nexcept ImportError:\n torch_version = 'not installed'\n cuda_available = False\n DEVICE = 'cpu'\n\n# Load config\nconfig_json_path = Path('notebook_config.json')\nif config_json_path.exists():\n with open(config_json_path, 'r') as f:\n loaded_config = json.load(f)\nelse:\n loaded_config = {}\n\nBASE_DIR = Path('..').resolve()\nDATASETS_DIR = BASE_DIR / 'datasets'\nMODELS_DIR = BASE_DIR / 'models'\nARTIFACTS_DIR = BASE_DIR / 'artifacts'\nRANDOM_STATE = loaded_config.get('random_state', 42)\nTEST_SIZE = loaded_config.get('test_size', 0.2)\nCV_FOLDS = loaded_config.get('cv_folds', 5)\n\n# Export configuration for other notebooks\nnotebook_config = {\n 'device': str(DEVICE),\n 'python_version': f'{python_version.major}.{python_version.minor}.{python_version.micro}',\n 'torch_version': torch_version,\n 'cuda_available': cuda_available,\n 'base_dir': str(BASE_DIR),\n 'datasets_dir': str(DATASETS_DIR),\n 'models_dir': str(MODELS_DIR),\n 'artifacts_dir': str(ARTIFACTS_DIR),\n 'random_state': RANDOM_STATE,\n 'test_size': TEST_SIZE,\n 'cv_folds': CV_FOLDS,\n}\n\nconfig_path = Path('notebook_runtime_config.json')\nwith open(config_path, 'w') as f:\n json.dump(notebook_config, f, indent=2)\n\nprint(f'\u2713 Configuration exported to: {config_path.absolute()}')\nprint(json.dumps(notebook_config, indent=2))\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  },
225
  {
226
  "cell_type": "markdown",
 
236
  "id": "f409be56",
237
  "metadata": {},
238
  "outputs": [],
239
+ "source": "import sys\nimport json\nimport os\nfrom pathlib import Path\n\npython_version = sys.version_info\n\ntry:\n import torch\n torch_version = torch.__version__\n if torch.cuda.is_available():\n DEVICE = 'cuda'\n elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():\n DEVICE = 'mps'\n else:\n DEVICE = 'cpu'\nexcept ImportError:\n torch_version = 'not installed'\n DEVICE = 'cpu'\n\n# Load config\nconfig_json_path = Path('notebook_config.json')\nif config_json_path.exists():\n with open(config_json_path, 'r') as f:\n loaded_config = json.load(f)\nelse:\n loaded_config = {}\n\nGEMINI_API_KEY = loaded_config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY', '')\nHUGGINGFACE_TOKEN = os.getenv('HF_TOKEN', '')\n\nprint('\\n' + '=' * 60)\nprint('ENVIRONMENT SETUP COMPLETE')\nprint('=' * 60)\nprint(f'''\n\u2705 Python: {python_version.major}.{python_version.minor}.{python_version.micro}\n\u2705 Device: {DEVICE}\n\u2705 PyTorch: {torch_version}\n\u2705 Gemini API: {'Ready' if GEMINI_API_KEY else 'Not configured'}\n\u2705 HuggingFace: {'Ready' if HUGGINGFACE_TOKEN else 'Using public access'}\n\u2705 WebScraper API: Ready\n\u2705 Directories: Created\n\nYou can now proceed to the next notebook:\n \u2192 01_data_acquisition.ipynb\n''')\nprint('=' * 60)\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  }
241
  ],
242
  "metadata": {