{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "163a0ec0-2003-40f8-bf8a-63c1146f9130", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"\\nimport os\\n\\n# 设置环境变量\\nos.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'\\n\\n# 打印环境变量以确认设置成功\\nprint(os.environ.get('HF_ENDPOINT'))\\n\"" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import subprocess\n", "import os\n", "\n", "result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n", "output = result.stdout\n", "for line in output.splitlines():\n", " if '=' in line:\n", " var, value = line.split('=', 1)\n", " os.environ[var] = value\n", "\n", "\"\"\"\n", "import os\n", "\n", "# 设置环境变量\n", "os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'\n", "\n", "# 打印环境变量以确认设置成功\n", "print(os.environ.get('HF_ENDPOINT'))\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "4aa15036-4be6-44ac-be6d-83389ae07c5d", "metadata": {}, "outputs": [], "source": [ "# from datasets import load_dataset\n", "\n", "# # 加载特定语言的 Wikipedia 语料\n", "# langs = [\"20220301.en\", \"20220301.fr\", \"20220301.de\", \"20220301.zh\", \"20220301.ja\"]\n", "# datasets = {lang: load_dataset(\"wikimedia/wikipedia\", lang, split=\"train\") for lang in langs}" ] }, { "cell_type": "code", "execution_count": 3, "id": "02351414-fcad-44ba-964c-8ced0a88e609", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "408855cc406d47a3a47803d00829febd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading dataset shards: 0%| | 0/41 [00:00