Pablo Dejuan commited on
Commit ·
772f0cb
1
Parent(s): 25e7df7
init git and download artgan files
Browse files- .editorconfig +12 -0
- .gitignore +31 -0
- .tool-versions +1 -0
- data/artgan_csv/artist_class.txt +23 -0
- data/artgan_csv/artist_train.csv +0 -0
- data/artgan_csv/artist_val.csv +0 -0
- data/artgan_csv/genre_class.txt +10 -0
- data/artgan_csv/genre_train.csv +0 -0
- data/artgan_csv/genre_val.csv +0 -0
- data/artgan_csv/style_class.txt +27 -0
- data/artgan_csv/style_train.csv +0 -0
- data/artgan_csv/style_val.csv +0 -0
- doc/data_preparation.md +21 -0
- doc/setup.md +21 -0
- scripts/validate_artgan_paths.py +62 -0
.editorconfig
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# EditorConfig is awesome: https://EditorConfig.org
|
| 2 |
+
|
| 3 |
+
# top-most EditorConfig file
|
| 4 |
+
root = true
|
| 5 |
+
|
| 6 |
+
[*]
|
| 7 |
+
indent_style = space
|
| 8 |
+
indent_size = 4
|
| 9 |
+
end_of_line = lf
|
| 10 |
+
charset = utf-8
|
| 11 |
+
trim_trailing_whitespace = false
|
| 12 |
+
insert_final_newline = false
|
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
.Python
|
| 6 |
+
venv/
|
| 7 |
+
.venv/
|
| 8 |
+
.env
|
| 9 |
+
*.egg-info/
|
| 10 |
+
.eggs/
|
| 11 |
+
dist/
|
| 12 |
+
build/
|
| 13 |
+
|
| 14 |
+
# Data & outputs — large dirs and model files are not committed
|
| 15 |
+
data/arty_cache/
|
| 16 |
+
data/images/
|
| 17 |
+
data/wikiart/
|
| 18 |
+
data/wikiart.zip
|
| 19 |
+
*.pth
|
| 20 |
+
*.pt
|
| 21 |
+
checkpoints/
|
| 22 |
+
logs/
|
| 23 |
+
runs/
|
| 24 |
+
|
| 25 |
+
# Keep the metadata index in the repo (small, useful to commit)
|
| 26 |
+
!data/wikiart_index.csv
|
| 27 |
+
|
| 28 |
+
# IDE & OS
|
| 29 |
+
.idea/
|
| 30 |
+
.vscode/
|
| 31 |
+
.DS_Store
|
.tool-versions
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python 3.13.2
|
data/artgan_csv/artist_class.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0 Albrecht_Durer
|
| 2 |
+
1 Boris_Kustodiev
|
| 3 |
+
2 Camille_Pissarro
|
| 4 |
+
3 Childe_Hassam
|
| 5 |
+
4 Claude_Monet
|
| 6 |
+
5 Edgar_Degas
|
| 7 |
+
6 Eugene_Boudin
|
| 8 |
+
7 Gustave_Dore
|
| 9 |
+
8 Ilya_Repin
|
| 10 |
+
9 Ivan_Aivazovsky
|
| 11 |
+
10 Ivan_Shishkin
|
| 12 |
+
11 John_Singer_Sargent
|
| 13 |
+
12 Marc_Chagall
|
| 14 |
+
13 Martiros_Saryan
|
| 15 |
+
14 Nicholas_Roerich
|
| 16 |
+
15 Pablo_Picasso
|
| 17 |
+
16 Paul_Cezanne
|
| 18 |
+
17 Pierre_Auguste_Renoir
|
| 19 |
+
18 Pyotr_Konchalovsky
|
| 20 |
+
19 Raphael_Kirchner
|
| 21 |
+
20 Rembrandt
|
| 22 |
+
21 Salvador_Dali
|
| 23 |
+
22 Vincent_van_Gogh
|
data/artgan_csv/artist_train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/artgan_csv/artist_val.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/artgan_csv/genre_class.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0 abstract_painting
|
| 2 |
+
1 cityscape
|
| 3 |
+
2 genre_painting
|
| 4 |
+
3 illustration
|
| 5 |
+
4 landscape
|
| 6 |
+
5 nude_painting
|
| 7 |
+
6 portrait
|
| 8 |
+
7 religious_painting
|
| 9 |
+
8 sketch_and_study
|
| 10 |
+
9 still_life
|
data/artgan_csv/genre_train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/artgan_csv/genre_val.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/artgan_csv/style_class.txt
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0 Abstract_Expressionism
|
| 2 |
+
1 Action_painting
|
| 3 |
+
2 Analytical_Cubism
|
| 4 |
+
3 Art_Nouveau_Modern
|
| 5 |
+
4 Baroque
|
| 6 |
+
5 Color_Field_Painting
|
| 7 |
+
6 Contemporary_Realism
|
| 8 |
+
7 Cubism
|
| 9 |
+
8 Early_Renaissance
|
| 10 |
+
9 Expressionism
|
| 11 |
+
10 Fauvism
|
| 12 |
+
11 High_Renaissance
|
| 13 |
+
12 Impressionism
|
| 14 |
+
13 Mannerism_Late_Renaissance
|
| 15 |
+
14 Minimalism
|
| 16 |
+
15 Naive_Art_Primitivism
|
| 17 |
+
16 New_Realism
|
| 18 |
+
17 Northern_Renaissance
|
| 19 |
+
18 Pointillism
|
| 20 |
+
19 Pop_Art
|
| 21 |
+
20 Post_Impressionism
|
| 22 |
+
21 Realism
|
| 23 |
+
22 Rococo
|
| 24 |
+
23 Romanticism
|
| 25 |
+
24 Symbolism
|
| 26 |
+
25 Synthetic_Cubism
|
| 27 |
+
26 Ukiyo_e
|
data/artgan_csv/style_train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/artgan_csv/style_val.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
doc/data_preparation.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ArtGAN WikiArt
|
| 2 |
+
|
| 3 |
+
We use WikiArt dataset which has the source images and the indexes for each of the 3 categories already classified.
|
| 4 |
+
We need to download the images dataset and generate `data/wikiart_index.csv`.
|
| 5 |
+
|
| 6 |
+
## 1. Images
|
| 7 |
+
|
| 8 |
+
The refined WikiArt dataset (images only) is provided by the [ArtGAN project](https://github.com/cs-chan/ArtGAN/tree/master/WikiArt%20Dataset).
|
| 9 |
+
|
| 10 |
+
- **URL:** [wikiart.zip on Google Drive](https://drive.google.com/file/d/1vTChp3nU5GQeLkPwotrybpUGUXj12BTK/view) (~25.4 GB).
|
| 11 |
+
- **License:** Non-commercial research only; see [ArtGAN README](https://github.com/cs-chan/ArtGAN/blob/master/WikiArt%20Dataset/README.md).
|
| 12 |
+
|
| 13 |
+
We download the file into the repo, extract it under `data/wikiart`. The zip and the extracted files are excluded from git. `rembrandt_woman-standing-with-raised-hands.jpg` and `vincent-van-gogh_l-arlesienne-portrait-of-madame-ginoux-1890.jpg` contained errors.
|
| 14 |
+
|
| 15 |
+
The root data dir `data/wikiart/` contains 27 style subdirectories, each containing images named `{artist-slug}_{painting-title}-{year}.jpg`.
|
| 16 |
+
|
| 17 |
+
## 2. ArtGAN label files for metadata
|
| 18 |
+
|
| 19 |
+
ArtGAN only uses a subset of WikiArt, ArtGAN’s artist, style and genre labels [wikiart_csv.zip] (https://drive.google.com/file/d/1uug57zp13wJDwb2nuHOQfR2Odr0hh1a8/view)
|
| 20 |
+
which we include in the repo for convenience `data/artgan_csv`. There's a slight variation from in the style `Art_Nouveau` to `Art_Nouveau_Modern`.
|
| 21 |
+
Using `scripts/validate_artgan_paths.py` to validate
|
doc/setup.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Development setup
|
| 2 |
+
|
| 3 |
+
## Runtimes (asdf)
|
| 4 |
+
|
| 5 |
+
The project uses [asdf](https://asdf-vm.com/) for runtimes. Versions are pinned in [`.tool-versions`](../.tool-versions).
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# Install the plugin and runtime if needed
|
| 9 |
+
asdf plugin add python
|
| 10 |
+
asdf install # installs versions listed in .tool-versions
|
| 11 |
+
```
|
| 12 |
+
|
| 13 |
+
## Python env
|
| 14 |
+
|
| 15 |
+
Created a virtualenv and install dependencies:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
python -m venv .venv
|
| 19 |
+
. .venv/bin/activate
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
```
|
scripts/validate_artgan_paths.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
scripts/validate_artgan_paths.py
|
| 3 |
+
=================================
|
| 4 |
+
Check that every path in the ArtGAN train/val CSVs exists under data/wikiart/.
|
| 5 |
+
"""
|
| 6 |
+
import csv
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
| 11 |
+
WIKIART_ROOT = REPO_ROOT / "data" / "wikiart"
|
| 12 |
+
CSV_DIR = REPO_ROOT / "data" / "artgan_csv"
|
| 13 |
+
|
| 14 |
+
CSV_FILES = [
|
| 15 |
+
"style_train.csv",
|
| 16 |
+
"style_val.csv",
|
| 17 |
+
"genre_train.csv",
|
| 18 |
+
"genre_val.csv",
|
| 19 |
+
"artist_train.csv",
|
| 20 |
+
"artist_val.csv",
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
def main() -> None:
|
| 24 |
+
if not WIKIART_ROOT.exists():
|
| 25 |
+
print(f"ERROR: {WIKIART_ROOT} not found.")
|
| 26 |
+
sys.exit(1)
|
| 27 |
+
|
| 28 |
+
total_files = 0
|
| 29 |
+
all_ok = True
|
| 30 |
+
for csv_file_name in CSV_FILES:
|
| 31 |
+
csv_file_path = CSV_DIR / csv_file_name
|
| 32 |
+
label = csv_file_name.replace(".csv", "")
|
| 33 |
+
if not csv_file_path.exists():
|
| 34 |
+
print(f" {label}: SKIP (file not found)")
|
| 35 |
+
continue
|
| 36 |
+
|
| 37 |
+
missing = []
|
| 38 |
+
total = 0
|
| 39 |
+
with open(csv_file_path) as csv_file:
|
| 40 |
+
for row in csv.reader(csv_file):
|
| 41 |
+
if not row:
|
| 42 |
+
continue
|
| 43 |
+
total += 1
|
| 44 |
+
local_path = row[0].strip()
|
| 45 |
+
if not (WIKIART_ROOT / local_path).exists():
|
| 46 |
+
missing.append(local_path)
|
| 47 |
+
if missing:
|
| 48 |
+
all_ok = False
|
| 49 |
+
print(f" {label}: {total} rows, MISSING {len(missing)} files")
|
| 50 |
+
for p in missing[:5]:
|
| 51 |
+
print(f" - {p}")
|
| 52 |
+
if len(missing) > 5:
|
| 53 |
+
print(f" ... and {len(missing) - 5} more")
|
| 54 |
+
else:
|
| 55 |
+
print(f" {label}: {total} rows, OK")
|
| 56 |
+
total_files += total
|
| 57 |
+
if not all_ok:
|
| 58 |
+
sys.exit(1)
|
| 59 |
+
print(f"{total_files} total files, OK")
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
main()
|