Commit
·
27b0d30
1
Parent(s):
8ed5a9b
Update Querying Data to run directly from Colab
Browse files- Querying Data.ipynb +13 -5
Querying Data.ipynb
CHANGED
|
@@ -33,10 +33,11 @@
|
|
| 33 |
"import pandas.io.formats.style\n",
|
| 34 |
"import random\n",
|
| 35 |
"import functools\n",
|
|
|
|
| 36 |
"from typing import Literal\n",
|
| 37 |
"\n",
|
| 38 |
"SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
|
| 39 |
-
"DATA_FOLDER = \"
|
| 40 |
]
|
| 41 |
},
|
| 42 |
{
|
|
@@ -105,10 +106,17 @@
|
|
| 105 |
"metadata": {},
|
| 106 |
"outputs": [],
|
| 107 |
"source": [
|
| 108 |
-
"
|
| 109 |
-
"
|
| 110 |
-
"
|
| 111 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
"tags_by_name = tags.copy(deep=True)\n",
|
| 113 |
"tags_by_name.set_index(\"name\", inplace=True)\n",
|
| 114 |
"tags.set_index(\"tag_id\", inplace=True)"
|
|
|
|
| 33 |
"import pandas.io.formats.style\n",
|
| 34 |
"import random\n",
|
| 35 |
"import functools\n",
|
| 36 |
+
"import os\n",
|
| 37 |
"from typing import Literal\n",
|
| 38 |
"\n",
|
| 39 |
"SOURCE: Literal[\"danbooru\", \"e621\"] = \"e621\"\n",
|
| 40 |
+
"DATA_FOLDER = \".\""
|
| 41 |
]
|
| 42 |
},
|
| 43 |
{
|
|
|
|
| 106 |
"metadata": {},
|
| 107 |
"outputs": [],
|
| 108 |
"source": [
|
| 109 |
+
"def get_feather(filename: str) -> pandas.DataFrame:\n",
|
| 110 |
+
" target = f\"{DATA_FOLDER}/{filename}.feather\"\n",
|
| 111 |
+
" if not os.path.exists(target):\n",
|
| 112 |
+
" !wget -O {filename}.feather https://huggingface.co/Specimen5423/E621TagAssociations/resolve/main/{filename}.feather?download=true\n",
|
| 113 |
+
" return pandas.read_feather(target)\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"os.makedirs(DATA_FOLDER, exist_ok=True)\n",
|
| 116 |
+
"tags = get_feather(\"tags\")\n",
|
| 117 |
+
"posts_by_tag = get_feather(\"posts_by_tag\").set_index(\"tag_id\")\n",
|
| 118 |
+
"tags_by_post = get_feather(\"tags_by_post\").set_index(\"post_id\")\n",
|
| 119 |
+
"implications = get_feather(\"implications\")\n",
|
| 120 |
"tags_by_name = tags.copy(deep=True)\n",
|
| 121 |
"tags_by_name.set_index(\"name\", inplace=True)\n",
|
| 122 |
"tags.set_index(\"tag_id\", inplace=True)"
|