Spaces:
Running
Running
Fix cells 8-10 to be self-contained
Browse files
notebooks/00_environment_setup.ipynb
CHANGED
|
@@ -204,23 +204,7 @@
|
|
| 204 |
"id": "776236f8",
|
| 205 |
"metadata": {},
|
| 206 |
"outputs": [],
|
| 207 |
-
"source": [
|
| 208 |
-
"# Create necessary directories\n",
|
| 209 |
-
"directories = [\n",
|
| 210 |
-
" config.DATASETS_DIR,\n",
|
| 211 |
-
" config.MODELS_DIR,\n",
|
| 212 |
-
" config.ARTIFACTS_DIR,\n",
|
| 213 |
-
" config.BASE_DIR / \"logs\",\n",
|
| 214 |
-
" config.BASE_DIR / \"cache\",\n",
|
| 215 |
-
"]\n",
|
| 216 |
-
"\n",
|
| 217 |
-
"print(\"Creating directory structure...\")\n",
|
| 218 |
-
"for directory in directories:\n",
|
| 219 |
-
" directory.mkdir(parents=True, exist_ok=True)\n",
|
| 220 |
-
" print(f\" \u2713 {directory}\")\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"print(\"\\n\u2713 Directory structure ready!\")"
|
| 223 |
-
]
|
| 224 |
},
|
| 225 |
{
|
| 226 |
"cell_type": "markdown",
|
|
@@ -236,34 +220,7 @@
|
|
| 236 |
"id": "6b854bac",
|
| 237 |
"metadata": {},
|
| 238 |
"outputs": [],
|
| 239 |
-
"source":
|
| 240 |
-
"import json\n",
|
| 241 |
-
"\n",
|
| 242 |
-
"# Export configuration for other notebooks\n",
|
| 243 |
-
"notebook_config = {\n",
|
| 244 |
-
" \"device\": str(DEVICE),\n",
|
| 245 |
-
" \"python_version\": f\"{python_version.major}.{python_version.minor}.{python_version.micro}\",\n",
|
| 246 |
-
" \"torch_version\": torch.__version__,\n",
|
| 247 |
-
" \"cuda_available\": cuda_available,\n",
|
| 248 |
-
" \"base_dir\": str(config.BASE_DIR),\n",
|
| 249 |
-
" \"datasets_dir\": str(config.DATASETS_DIR),\n",
|
| 250 |
-
" \"models_dir\": str(config.MODELS_DIR),\n",
|
| 251 |
-
" \"artifacts_dir\": str(config.ARTIFACTS_DIR),\n",
|
| 252 |
-
" \"random_state\": config.RANDOM_STATE,\n",
|
| 253 |
-
" \"test_size\": config.TEST_SIZE,\n",
|
| 254 |
-
" \"cv_folds\": config.CV_FOLDS,\n",
|
| 255 |
-
" \"gemini_configured\": bool(config.GEMINI_API_KEY),\n",
|
| 256 |
-
" \"huggingface_configured\": bool(config.HUGGINGFACE_TOKEN),\n",
|
| 257 |
-
" \"created_at\": str(pd.Timestamp.now())\n",
|
| 258 |
-
"}\n",
|
| 259 |
-
"\n",
|
| 260 |
-
"config_path = config.BASE_DIR / \"notebook_config.json\"\n",
|
| 261 |
-
"with open(config_path, \"w\") as f:\n",
|
| 262 |
-
" json.dump(notebook_config, f, indent=2)\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"print(f\"\u2713 Configuration saved to: {config_path}\")\n",
|
| 265 |
-
"print(\"\\n\" + json.dumps(notebook_config, indent=2))"
|
| 266 |
-
]
|
| 267 |
},
|
| 268 |
{
|
| 269 |
"cell_type": "markdown",
|
|
@@ -279,24 +236,7 @@
|
|
| 279 |
"id": "f409be56",
|
| 280 |
"metadata": {},
|
| 281 |
"outputs": [],
|
| 282 |
-
"source":
|
| 283 |
-
"print(\"\\n\" + \"=\" * 60)\n",
|
| 284 |
-
"print(\"ENVIRONMENT SETUP COMPLETE\")\n",
|
| 285 |
-
"print(\"=\" * 60)\n",
|
| 286 |
-
"print(f\"\"\"\n",
|
| 287 |
-
"\u2705 Python: {python_version.major}.{python_version.minor}.{python_version.micro}\n",
|
| 288 |
-
"\u2705 Device: {DEVICE}\n",
|
| 289 |
-
"\u2705 PyTorch: {torch.__version__}\n",
|
| 290 |
-
"\u2705 Gemini API: {'Ready' if config.GEMINI_API_KEY else 'Not configured'}\n",
|
| 291 |
-
"\u2705 HuggingFace: {'Ready' if config.HUGGINGFACE_TOKEN else 'Not configured'}\n",
|
| 292 |
-
"\u2705 WebScraper API: Ready\n",
|
| 293 |
-
"\u2705 Directories: Created\n",
|
| 294 |
-
"\n",
|
| 295 |
-
"You can now proceed to the next notebook:\n",
|
| 296 |
-
" \u2192 01_data_acquisition.ipynb\n",
|
| 297 |
-
"\"\"\")\n",
|
| 298 |
-
"print(\"=\" * 60)"
|
| 299 |
-
]
|
| 300 |
}
|
| 301 |
],
|
| 302 |
"metadata": {
|
|
|
|
| 204 |
"id": "776236f8",
|
| 205 |
"metadata": {},
|
| 206 |
"outputs": [],
|
| 207 |
+
"source": "from pathlib import Path\n\n# Define directories (self-contained)\nBASE_DIR = Path('..').resolve()\nDATASETS_DIR = BASE_DIR / 'datasets'\nMODELS_DIR = BASE_DIR / 'models'\nARTIFACTS_DIR = BASE_DIR / 'artifacts'\n\n# Create necessary directories\ndirectories = [\n DATASETS_DIR,\n MODELS_DIR,\n ARTIFACTS_DIR,\n BASE_DIR / 'logs',\n BASE_DIR / 'cache',\n]\n\nprint('Creating directory structure...')\nfor directory in directories:\n directory.mkdir(parents=True, exist_ok=True)\n print(f' \u2713 {directory}')\n\nprint('\\n\u2713 Directory structure ready!')\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
},
|
| 209 |
{
|
| 210 |
"cell_type": "markdown",
|
|
|
|
| 220 |
"id": "6b854bac",
|
| 221 |
"metadata": {},
|
| 222 |
"outputs": [],
|
| 223 |
+
"source": "import json\nimport sys\nimport os\nfrom pathlib import Path\n\n# Get values (self-contained)\npython_version = sys.version_info\n\ntry:\n import torch\n torch_version = torch.__version__\n cuda_available = torch.cuda.is_available()\n if cuda_available:\n DEVICE = 'cuda'\n elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():\n DEVICE = 'mps'\n else:\n DEVICE = 'cpu'\nexcept ImportError:\n torch_version = 'not installed'\n cuda_available = False\n DEVICE = 'cpu'\n\n# Load config\nconfig_json_path = Path('notebook_config.json')\nif config_json_path.exists():\n with open(config_json_path, 'r') as f:\n loaded_config = json.load(f)\nelse:\n loaded_config = {}\n\nBASE_DIR = Path('..').resolve()\nDATASETS_DIR = BASE_DIR / 'datasets'\nMODELS_DIR = BASE_DIR / 'models'\nARTIFACTS_DIR = BASE_DIR / 'artifacts'\nRANDOM_STATE = loaded_config.get('random_state', 42)\nTEST_SIZE = loaded_config.get('test_size', 0.2)\nCV_FOLDS = loaded_config.get('cv_folds', 5)\n\n# Export configuration for other notebooks\nnotebook_config = {\n 'device': str(DEVICE),\n 'python_version': f'{python_version.major}.{python_version.minor}.{python_version.micro}',\n 'torch_version': torch_version,\n 'cuda_available': cuda_available,\n 'base_dir': str(BASE_DIR),\n 'datasets_dir': str(DATASETS_DIR),\n 'models_dir': str(MODELS_DIR),\n 'artifacts_dir': str(ARTIFACTS_DIR),\n 'random_state': RANDOM_STATE,\n 'test_size': TEST_SIZE,\n 'cv_folds': CV_FOLDS,\n}\n\nconfig_path = Path('notebook_runtime_config.json')\nwith open(config_path, 'w') as f:\n json.dump(notebook_config, f, indent=2)\n\nprint(f'\u2713 Configuration exported to: {config_path.absolute()}')\nprint(json.dumps(notebook_config, indent=2))\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
},
|
| 225 |
{
|
| 226 |
"cell_type": "markdown",
|
|
|
|
| 236 |
"id": "f409be56",
|
| 237 |
"metadata": {},
|
| 238 |
"outputs": [],
|
| 239 |
+
"source": "import sys\nimport json\nimport os\nfrom pathlib import Path\n\npython_version = sys.version_info\n\ntry:\n import torch\n torch_version = torch.__version__\n if torch.cuda.is_available():\n DEVICE = 'cuda'\n elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():\n DEVICE = 'mps'\n else:\n DEVICE = 'cpu'\nexcept ImportError:\n torch_version = 'not installed'\n DEVICE = 'cpu'\n\n# Load config\nconfig_json_path = Path('notebook_config.json')\nif config_json_path.exists():\n with open(config_json_path, 'r') as f:\n loaded_config = json.load(f)\nelse:\n loaded_config = {}\n\nGEMINI_API_KEY = loaded_config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY', '')\nHUGGINGFACE_TOKEN = os.getenv('HF_TOKEN', '')\n\nprint('\\n' + '=' * 60)\nprint('ENVIRONMENT SETUP COMPLETE')\nprint('=' * 60)\nprint(f'''\n\u2705 Python: {python_version.major}.{python_version.minor}.{python_version.micro}\n\u2705 Device: {DEVICE}\n\u2705 PyTorch: {torch_version}\n\u2705 Gemini API: {'Ready' if GEMINI_API_KEY else 'Not configured'}\n\u2705 HuggingFace: {'Ready' if HUGGINGFACE_TOKEN else 'Using public access'}\n\u2705 WebScraper API: Ready\n\u2705 Directories: Created\n\nYou can now proceed to the next notebook:\n \u2192 01_data_acquisition.ipynb\n''')\nprint('=' * 60)\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
}
|
| 241 |
],
|
| 242 |
"metadata": {
|