Martin L (GitHub Actions) commited on
Commit
be53a00
·
1 Parent(s): 3c9bb11

Automated deployment from GitHub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +12 -0
  2. .gitignore +170 -0
  3. .pre-commit-config.yaml +9 -0
  4. .vscode/settings.json +15 -0
  5. Dockerfile +58 -0
  6. INSTALL_HF_DOCKER_SPACE.sh +52 -0
  7. INSTALL_LINUX.sh +62 -0
  8. LICENSE +339 -0
  9. Makefile +13 -0
  10. README.md +6 -8
  11. data/.gitkeep +1 -0
  12. docs/2024-08-27-22-52_unit_calculations.xoj +0 -0
  13. docs/ADRs/2025-10-04_design_of_huggingface_space_dockerfile.md +32 -0
  14. docs/annotate_tool_UI_design.svg +198 -0
  15. docs/contributing.md +74 -0
  16. docs/data_collection.md +13 -0
  17. docs/datasets_literature_review.md +25 -0
  18. docs/developer_guide.md +59 -0
  19. docs/developing_new_models.md +21 -0
  20. docs/funding.md +9 -0
  21. docs/huggingface_docker_space_deployment.md +52 -0
  22. docs/images/.gitkeep +1 -0
  23. docs/images/TODO.md +1 -0
  24. docs/images/system_design.jpg +0 -0
  25. docs/index.md +53 -0
  26. docs/installation_developer.md +7 -0
  27. docs/installation_user.md +29 -0
  28. docs/pyinstaller_experiment.md +23 -0
  29. docs/requirements.txt +3 -0
  30. docs/roadmap.md +77 -0
  31. docs/user_guide.md +21 -0
  32. experiments/2025-02-05_writing_test/index.html +17 -0
  33. experiments/2025-02-05_writing_test/script.js +47 -0
  34. experiments/2025-02-05_writing_test/styles.css +19 -0
  35. mkdocs.yml +40 -0
  36. notebooks/experiment_with_IAM_OnDo_dataset.ipynb +329 -0
  37. notebooks/experiment_with_clustering_for_online_word_detection.ipynb +526 -0
  38. plugin/config.lua +14 -0
  39. plugin/copy_to_plugin_folder.sh +16 -0
  40. plugin/demo_config.lua +14 -0
  41. plugin/main.lua +46 -0
  42. plugin/plugin.ini +17 -0
  43. pyproject.toml +20 -0
  44. pytest.ini +11 -0
  45. requirements.txt +9 -0
  46. requirements_training.txt +4 -0
  47. scripts/demo.py +230 -0
  48. scripts/demo_concept_1.sh +4 -0
  49. setup.py +49 -0
  50. tests/.gitkeep +0 -0
.dockerignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore local data and virtual environments
2
+ data/
3
+ .venv/
4
+
5
+ # Common extras you probably don't want in the image
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+ .env
11
+ .git
12
+ .gitignore
.gitignore ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ external/
2
+ tests/data/
3
+ data/datasets/
4
+ .ipynb_checkpoints/
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ .pybuilder/
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ # For a library or package, you might want to ignore these files since the code is
92
+ # intended to run in multiple environments; otherwise, check them in:
93
+ # .python-version
94
+
95
+ # pipenv
96
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
98
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
99
+ # install all needed dependencies.
100
+ #Pipfile.lock
101
+
102
+ # poetry
103
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107
+ #poetry.lock
108
+
109
+ # pdm
110
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111
+ #pdm.lock
112
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113
+ # in version control.
114
+ # https://pdm.fming.dev/#use-with-ide
115
+ .pdm.toml
116
+
117
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118
+ __pypackages__/
119
+
120
+ # Celery stuff
121
+ celerybeat-schedule
122
+ celerybeat.pid
123
+
124
+ # SageMath parsed files
125
+ *.sage.py
126
+
127
+ # Environments
128
+ .env
129
+ .venv
130
+ env/
131
+ venv/
132
+ ENV/
133
+ env.bak/
134
+ venv.bak/
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
159
+
160
+ # PyCharm
161
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
164
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165
+ #.idea/
166
+
167
+ .gradio/
168
+ best_model.pth
169
+
170
+ .DS_Store
.pre-commit-config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.4.4
4
+ hooks:
5
+ - id: ruff # linter
6
+ types_or: [python, pyi, jupyter]
7
+ args: [--exit-non-zero-on-fix]
8
+ - id: ruff-format # formatter
9
+ types_or: [python, pyi, jupyter]
.vscode/settings.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.formatOnSave": true,
4
+ "editor.codeActionsOnSave": {
5
+ "source.fixAll": "explicit",
6
+ "source.organizeImports": "explicit"
7
+ },
8
+ "editor.defaultFormatter": "charliermarsh.ruff"
9
+ },
10
+ "notebook.formatOnSave.enabled": true,
11
+ "notebook.codeActionsOnSave": {
12
+ "notebook.source.fixAll": "explicit",
13
+ "notebook.source.organizeImports": "explicit"
14
+ },
15
+ }
Dockerfile ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+
3
+ # Start from an official lightweight Python image
4
+ FROM python:3.10-slim
5
+
6
+ # Prevents Python from writing .pyc files and buffering stdout/stderr
7
+ # ENV PYTHONDONTWRITEBYTECODE=1
8
+ # ENV PYTHONUNBUFFERED=1
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ git \
13
+ wget \
14
+ unzip \
15
+ vim-tiny \
16
+ curl \
17
+ libgl1 \
18
+ libglib2.0-0 \
19
+ xournalpp \
20
+ poppler-utils \
21
+ && rm -rf /var/lib/apt/lists/*
22
+
23
+ # Create and set working directory
24
+ WORKDIR /app
25
+
26
+ # Create temp_code_mount folder
27
+ RUN mkdir -p /temp_code_mount
28
+
29
+ # Install Python dependencies early for caching
30
+ # COPY requirements.txt .
31
+ # RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # Copy application code
34
+ COPY . .
35
+
36
+ # Run the INSTALL_HF_DOCKER_SPACE.sh script
37
+ RUN bash INSTALL_HF_DOCKER_SPACE.sh
38
+ RUN pip install matplotlib bs4 pdf2image supabase python-dotenv
39
+ # ^- that should not be necessary!! TODO!!
40
+
41
+ # Expose the port Gradio will run on inside Hugging Face Spaces
42
+ EXPOSE 7860
43
+
44
+ # Command to run Gradio app
45
+ # Hugging Face Spaces will set PORT env var, so we use it
46
+ CMD ["python", "scripts/demo.py"]
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+ # https://huggingface.co/docs/hub/spaces-sdks-docker
56
+
57
+ # https://huggingface.co/spaces/SpacesExamples/secret-example/tree/main
58
+ # - https://huggingface.co/spaces/SpacesExamples/secret-example/blob/main/Dockerfile
INSTALL_HF_DOCKER_SPACE.sh ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Based on `INSTALL_LINUX.sh` file.
2
+
3
+ # ========
4
+ # SETTINGS
5
+ # ========
6
+
7
+ HTR_PIPELINE_PATH="external/htr_pipeline"
8
+
9
+ # ================
10
+ # Helper functions
11
+ # ================
12
+
13
+ install_htr_pipeline () {
14
+
15
+ mkdir -p ${HTR_PIPELINE_PATH}
16
+ cd ${HTR_PIPELINE_PATH}
17
+ git clone https://github.com/githubharald/HTRPipeline.git
18
+ cd HTRPipeline
19
+ cd htr_pipeline/models
20
+ wget https://www.dropbox.com/s/j1hl6bppecug0sz/models.zip
21
+ unzip -o models.zip
22
+ cd ../../
23
+ pip install .
24
+ # 3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
25
+
26
+ }
27
+
28
+ CURRENT_DIR=$(pwd)
29
+
30
+ # ====================
31
+ # Installation process
32
+ # ====================
33
+
34
+ rm -rf ${HTR_PIPELINE_PATH}
35
+
36
+ install_htr_pipeline
37
+ cd ${CURRENT_DIR}
38
+ pip install -r requirements.txt
39
+ pip install gradio # TODO: Move to optional package in `pyproject.toml` once I use this setup.
40
+ pip install -e .
41
+
42
+ # ========
43
+ # Feedback
44
+ # ========
45
+
46
+ echo
47
+ echo "==========================================="
48
+ echo "==========================================="
49
+ echo "==========================================="
50
+ echo
51
+ echo "Installation complete"
52
+ echo
INSTALL_LINUX.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========
2
+ # SETTINGS
3
+ # ========
4
+
5
+ ENVIRONMENT_NAME="xournalpp_htr"
6
+ HTR_PIPELINE_PATH="external/htr_pipeline"
7
+
8
+ # ================
9
+ # Helper functions
10
+ # ================
11
+
12
+ install_htr_pipeline () {
13
+
14
+ mkdir -p ${HTR_PIPELINE_PATH}
15
+ cd ${HTR_PIPELINE_PATH}
16
+ git clone https://github.com/githubharald/HTRPipeline.git
17
+ cd HTRPipeline
18
+ cd htr_pipeline/models
19
+ wget https://www.dropbox.com/s/j1hl6bppecug0sz/models.zip
20
+ unzip -o models.zip
21
+ cd ../../
22
+ pip install .
23
+ # 3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
24
+
25
+ }
26
+
27
+ CURRENT_DIR=$(pwd)
28
+
29
+ # ====================
30
+ # Installation process
31
+ # ====================
32
+
33
+ rm -rf ${HTR_PIPELINE_PATH}
34
+
35
+ eval "$(conda shell.bash hook)" # enable `conda activate`, see
36
+ # https://stackoverflow.com/a/56155771
37
+
38
+ conda create --name ${ENVIRONMENT_NAME} python=3.10.11 -y
39
+ conda activate ${ENVIRONMENT_NAME}
40
+ install_htr_pipeline
41
+ cd ${CURRENT_DIR}
42
+ pip install -r requirements.txt
43
+ pip install -e .
44
+ pre-commit install
45
+
46
+ cd plugin
47
+ bash copy_to_plugin_folder.sh
48
+
49
+ # ========
50
+ # Feedback
51
+ # ========
52
+
53
+ echo
54
+ echo "==========================================="
55
+ echo "==========================================="
56
+ echo "==========================================="
57
+ echo
58
+ echo "Installation complete"
59
+ echo
60
+ echo "Activate environment with:"
61
+ echo "\"conda activate ${ENVIRONMENT_NAME}\""
62
+ echo
LICENSE ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Lesser General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
281
+
282
+ How to Apply These Terms to Your New Programs
283
+
284
+ If you develop a new program, and you want it to be of the greatest
285
+ possible use to the public, the best way to achieve this is to make it
286
+ free software which everyone can redistribute and change under these terms.
287
+
288
+ To do so, attach the following notices to the program. It is safest
289
+ to attach them to the start of each source file to most effectively
290
+ convey the exclusion of warranty; and each file should have at least
291
+ the "copyright" line and a pointer to where the full notice is found.
292
+
293
+ <one line to give the program's name and a brief idea of what it does.>
294
+ Copyright (C) <year> <name of author>
295
+
296
+ This program is free software; you can redistribute it and/or modify
297
+ it under the terms of the GNU General Public License as published by
298
+ the Free Software Foundation; either version 2 of the License, or
299
+ (at your option) any later version.
300
+
301
+ This program is distributed in the hope that it will be useful,
302
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
303
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304
+ GNU General Public License for more details.
305
+
306
+ You should have received a copy of the GNU General Public License along
307
+ with this program; if not, write to the Free Software Foundation, Inc.,
308
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
+
310
+ Also add information on how to contact you by electronic and paper mail.
311
+
312
+ If the program is interactive, make it output a short notice like this
313
+ when it starts in an interactive mode:
314
+
315
+ Gnomovision version 69, Copyright (C) year name of author
316
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317
+ This is free software, and you are welcome to redistribute it
318
+ under certain conditions; type `show c' for details.
319
+
320
+ The hypothetical commands `show w' and `show c' should show the appropriate
321
+ parts of the General Public License. Of course, the commands you use may
322
+ be called something other than `show w' and `show c'; they could even be
323
+ mouse-clicks or menu items--whatever suits your program.
324
+
325
+ You should also get your employer (if you work as a programmer) or your
326
+ school, if any, to sign a "copyright disclaimer" for the program, if
327
+ necessary. Here is a sample; alter the names:
328
+
329
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
+
332
+ <signature of Ty Coon>, 1 April 1989
333
+ Ty Coon, President of Vice
334
+
335
+ This General Public License does not permit incorporating your program into
336
+ proprietary programs. If your program is a subroutine library, you may
337
+ consider it more useful to permit linking proprietary applications with the
338
+ library. If this is what you want to do, use the GNU Lesser General
339
+ Public License instead of this License.
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: Fill it.
2
+
3
+ docs:
4
+ mkdocs build --clean
5
+ # TODO: Sth like https://numpy.org/doc/stable/reference/generated/numpy.mean.html#numpy.mean
6
+
7
+ tests-installation:
8
+ pytest -v -k "installation"
9
+
10
+ run-pre-commit-hooks:
11
+ pre-commit run --all-files
12
+
13
+ .PHONY: docs tests-installation
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- title: Xournalpp Htr
3
- emoji: 🚀
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Xournal++ HTR
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
+ ---
 
 
data/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+
docs/2024-08-27-22-52_unit_calculations.xoj ADDED
Binary file (6.62 kB). View file
 
docs/ADRs/2025-10-04_design_of_huggingface_space_dockerfile.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Design of HuggingFace Space Dockerfile
2
+
3
+ - Status: Ongoing
4
+ - Deciders: Martin Lellep (@PellelNitram)
5
+ - Drivers: Martin Lellep (@PellelNitram)
6
+ - PRD: None
7
+ - Date: 2025-10-04
8
+
9
+ ## Context
10
+
11
+ *Explain the background and the context in which the decision is being made. Include any relevant information about the problem, constraints, or goals.*
12
+
13
+ ## Decisions
14
+
15
+ *State the decision that has been made. Be clear and concise.*
16
+
17
+ - In the future, download models at build time into the Docker image from Github release page. In the
18
+ very far future, pull them from HuggingFace at run-time.
19
+ - Add `xournalpp` binary to Docker image so that the `xopp` file can be exported as PDF prior to
20
+ execution of the HTR pipeline.
21
+
22
+ ## Consequences
23
+
24
+ *Describe the consequences of the decision. Include both positive and negative outcomes, as well as any trade-offs.*
25
+
26
+ ## Alternatives Considered
27
+
28
+ *List and briefly describe other options that were considered and why they were not chosen.*
29
+
30
+ ## References
31
+
32
+ *Include links or references to any supporting documentation, discussions, or resources.*
docs/annotate_tool_UI_design.svg ADDED
docs/contributing.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing
2
+
3
+ There are multiple ways to contribute to this project. Below, those ways are explained alongside information on how to best contribute from a codebase point of view.
4
+
5
+ Really, we greatly appreciate any help!
6
+
7
+ ## Ways to contribute
8
+
9
+ ### Reach out
10
+
11
+ If you have questions about how to best contribute or the slightest
12
+ interest in contributing, then feel free to reach out to me at any time :-).
13
+
14
+ ### Issues on Github
15
+
16
+ A great way to help out with this project is to check [open issues on Github](https://github.com/PellelNitram/xournalpp_htr/issues)
17
+ and to try to work on them.
18
+
19
+ If you need support with those, then please reach out to - we're very happy to help!
20
+
21
+ ## Things to consider when contributing
22
+
23
+ ### Branching strategy
24
+
25
+ The following branching strategy is used to keep the `master` branch stable and
26
+ allow for experimentation: `master` > `dev` > `feature branches`. This branching
27
+ strategy is shown in the following visualisation and then explained in more detail
28
+ in the next paragraph:
29
+
30
+ ```mermaid
31
+ %%{init:{ "gitGraph":{ "mainBranchName":"master" }}}%%
32
+ gitGraph
33
+ commit
34
+ commit
35
+ branch dev
36
+ commit
37
+ checkout dev
38
+ commit
39
+ commit
40
+ branch feature/awesome_new_feature
41
+ commit
42
+ checkout feature/awesome_new_feature
43
+ commit
44
+ commit
45
+ commit
46
+ checkout dev
47
+ merge feature/awesome_new_feature
48
+ commit
49
+ commit
50
+ checkout master
51
+ merge dev
52
+ commit
53
+ commit
54
+ ```
55
+
56
+ In more details, this repository adheres to the following git branching strategy: The
57
+ `master` branch remains stable and delivers a functioning product. The `dev` branch
58
+ consists of all code that will be merged to `master` eventually where the corresponding
59
+ features are developed in individual feature branches; the above visualisation shows an
60
+ example feature branch called `feature/awesome_new_feature` that works on a feature
61
+ called `awesome_new_feature`.
62
+
63
+ Given this structure, please implement new features as feature branches and
64
+ rebase them onto the `dev` branch prior to sending a pull request to `dev`.
65
+
66
+ Note: The Github Actions CI/CD pipeline runs on the branches `master` and `dev`.
67
+
68
+ ### Code quality
69
+
70
+ We try to keep up code quality as high as practically possible. For that reason, the following steps are implemented:
71
+
72
+ - Testing. Xournal++ HTR uses `pytest` for unit, regression and integration tests.
73
+ - Linting. Xournal++ HTR uses `ruff` for linting and code best practises. `ruff` is implemented as git pre-commit hook. Since `ruff` as pre-commit hook is configured externally with `pyproject.toml`, you can use the same settings in your IDE (e.g. VSCode) if you wish to speed up the process.
74
+ - Formatting. Xournal++ HTR uses `ruff-format` for consistent code formatting. `ruff-format` is implemented as git pre-commit hook. Since `ruff-format` as pre-commit hook is configured externally with `pyproject.toml`, you can use the same settings in your IDE if you wish to speed up the process.
docs/data_collection.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data collection and annotation
2
+
3
+ <div align="center">
4
+
5
+ <iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ?si=3xMriRxJb8TdjVui" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
6
+
7
+ <br>
8
+
9
+ <i>(<a href="https://youtu.be/dQw4w9WgXcQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
10
+
11
+ </div>
12
+
13
+ TODO
docs/datasets_literature_review.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ THIS DOCUMENT IS WORK IN PROGRESS AND WILL BE COMPLETED LATER ON!
2
+
3
+ ## Draft content
4
+
5
+ In this document, I am checking lit rev for datasets to know what is around and what might need to be created for best performing models.
6
+
7
+ TODO - *Now it gets messy*:
8
+
9
+ - See https://chatgpt.com/c/68037a32-e49c-8009-9629-c9d38404e42b
10
+ - https://github.com/rafaeljcdarce/HWR
11
+ - https://martin-thoma.com/write-math/
12
+ - (ask him) Data: The data can be downloaded from write-math.com/data. I will try to keep a relatively recent version online. You can contact me if you want the latest version. However, I should note that currently (2015-04-12) this is about 3.7GB. This means sharing the data is not that easy.
13
+ - this seems to be constrained to single (latex) symbols; this conclusion is based on those presentations:
14
+ - https://raw.githubusercontent.com/MartinThoma/LaTeX-examples/refs/heads/master/presentations/Bachelor-Short/LaTeX/bachelor-short.pdf
15
+ - interesting ideas: https://raw.githubusercontent.com/MartinThoma/LaTeX-examples/refs/heads/master/presentations/Bachelor-Final-Presentation/LaTeX/Bachelor-Final-Presentation.pdf
16
+ - similar to: https://detexify.kirelabs.org/classify.html
17
+ - ask him about write-math.com; https://martin-thoma.com/write-math/#data
18
+ - https://arxiv.org/abs/1511.09030
19
+ - https://hwrt.readthedocs.io/
20
+ - https://github.com/MartinThoma/hwr-experiments
21
+ - https://hwrt.readthedocs.io/index.html
22
+ - ! https://www.reddit.com/r/selfhosted/comments/1doy32j/document_scanning_ocr_that_works_well_with/
23
+ - https://www.reddit.com/r/computervision/comments/15er2y7/2023_review_of_tools_for_handwritten_text/
24
+ - https://detexify.kirelabs.org/classify.html
25
+ - https://github.com/kirel/detexify-data
docs/developer_guide.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Developer Guide
2
+
3
+ ## Project design
4
+
5
+ The design of Xournal++ HTR tries to bridge the gap between both delivering a production ready product and allowing contributors to experiment with new algorithms.
6
+
7
+ The project design involves a Lua plugin and a Python backend, see the following figure. First, the production ready product is delivered by means of an Xournal++ plugin. The plugin is fully integrated in Xournal++ and calls a Python backend that performs the actual transcription. The Python backend allows selection of various recognition models and is thereby fully extendable with new models.
8
+
9
+ <!--
10
+ DOESN'T WORK SOMEHOW:
11
+ <div align="center">
12
+ <img src="images/system_design.jpg" width="50%">
13
+ <p><i>Design of xournalpp_htr.</i></p>
14
+ </div>
15
+ -->
16
+
17
+ <!-- An alternative figure is shown below: -->
18
+
19
+ ```mermaid
20
+ sequenceDiagram
21
+ User in Xpp-->>Xpp HTR Plugin: starts transcription process using currently open file
22
+ Xpp HTR Plugin -->> Xpp HTR Lua Plugin: calls
23
+ Xpp HTR Lua Plugin -->>Xpp HTR Python Backend: constructs command using CLI
24
+ Xpp HTR Python Backend -->> Xpp HTR Python Backend: Does OCR & stores PDF
25
+ Xpp HTR Python Backend-->>User in Xpp: Gives back control to UI
26
+ ```
27
+
28
+ Developing a usable HTR systems requires experimentation. The project structure is set up to accommodate this need. *Note that ideas on improved project structures are appreciated.*
29
+
30
+ The experimentation is carried out in terms of "concepts". Each concept explores a different approach to HTR and possibly improves over previous concepts, but not necessarily to allow for freedom in risky experiments. Concept 1 is already implemented and uses a computer vision approach that is explained below.
31
+
32
+ Future concepts might explore:
33
+
34
+ - Retrain computer vision models from concept 1 using native online data representation of [Xournal++](https://github.com/xournalpp/xournalpp)
35
+ - Use sequence-to-sequence models to take advantage of native online data representation of [Xournal++](https://github.com/xournalpp/xournalpp); e.g. use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR)
36
+ - Use data augmentation to increase effective size of training data
37
+ - Use of language models to correct for spelling mistakes
38
+
39
+ ### Concept 1
40
+
41
+ This concept uses computer vision based algorithms to first detect words on a page and then to read those words.
42
+
43
+ The following shows a video demo on YouTube using real-life handwriting data from a Xournal file:
44
+
45
+ [![Xournal++ HTR - Concept 1 - Demo](https://img.youtube.com/vi/FGD_O8brGNY/0.jpg)](https://www.youtube.com/watch?v=FGD_O8brGNY)
46
+
47
+ Despite not being perfect, the main take away is that the performance is surprisingly good given that the underlying algorithm has not been optimised for Xournal++ data at all.
48
+
49
+ **The performance is sufficiently good to be useful for the Xournal++ user base.**
50
+
51
+ Feel free to play around with the demo yourself using [this code](https://github.com/PellelNitram/xournalpp_htr/blob/master/scripts/demo_concept_1.sh) after [installing this project](installation_user.md). The "concept 1" is also what is currently used in the plugin and shown in the [90 seconds demo](https://www.youtube.com/watch?v=boXm7lPFSRQ).
52
+
53
+ Next steps to improve the performance of the handwritten text recognition even further could be:
54
+
55
+ - Re-train the algorithm on Xournal++ specific data, while potentially using data augmentation.
56
+ - Use language model to improve text encoding.
57
+ - Use sequence-to-sequence algorithm that makes use of [Xournal++](https://github.com/xournalpp/xournalpp)'s data format. This translates into using online HTR algorithms.
58
+
59
+ I would like to acknowledge [Harald Scheidl](https://github.com/githubharald) in this concept as he wrote the underlying algorithms and made them easily usable through [his HTRPipeline repository](https://github.com/githubharald/HTRPipeline) - after all I just feed his algorithm [Xournal++](https://github.com/xournalpp/xournalpp) data in concept 1. [Go check out his great content](https://githubharald.github.io/)!
docs/developing_new_models.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Developing new models
2
+
3
+
4
+ <div align="center">
5
+
6
+ <iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ?si=3xMriRxJb8TdjVui" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
7
+
8
+ <br>
9
+
10
+ <i>(<a href="https://youtu.be/dQw4w9WgXcQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
11
+
12
+ </div>
13
+
14
+ - I provide dataset and code to experiment w/ new models
15
+ - train both your own bespoke and general models.
16
+
17
+ ## Training
18
+
19
+ ### Installation
20
+
21
+ Follow the above installation procedure and replace the step `pip install -r requirements.txt` by both `pip install -r requirements.txt` and `pip install -r requirements_training.txt` to install both the inference and training dependencies.
docs/funding.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Funding
2
+
3
+ This project is mostly a solo project and I love to work on it (*please [contribute](contributing.md), if you want to - happy to help along the way!*).
4
+
5
+ However, it is both a large time commitment and requires compute resources for training models.
6
+
7
+ If you think this project is valuable and want to express your gratitute, then please feel free to buy me a virtual coffee [here](https://ko-fi.com/martin_l) :-).
8
+
9
+ Thanks!!
docs/huggingface_docker_space_deployment.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Local Docker image building
2
+
3
+ 1. Build the Docker image: `docker build -t xournalpp_htr .`
4
+ 2. Run Docker image: `docker run -d -p 7860:7860 xournalpp_htr`
5
+ - Interactively for debugging: `docker run -it --entrypoint bash xournalpp_htr`
6
+ 3. Run Docker image for interactive development
7
+ - Start docker container: `docker run -it -p 7860:7860 -v $(pwd):/temp_code_mount --entrypoint bash xournalpp_htr`
8
+ - Call Python code inside the container: `python /temp_code_mount/scripts/demo.py`
9
+
10
+ Generally, tidy up Docker caches with `docker system prune` if your system is full.
11
+
12
+ ## looking into adding xournalpp to the image b/c i need that for the prediction (to convert xoj/xopp to pdf):
13
+
14
+ now cross compiled on M4
15
+ - build image: `docker buildx build --platform linux/amd64 -t xournalpp_htr .`
16
+ - interactively entering: `docker run -it --platform linux/amd64 -p 7860:7860 -v $(pwd):/temp_code_mount --entrypoint bash xournalpp_htr`
17
+ - dl deb file: `wget --no-check-certificate https://github.com/xournalpp/xournalpp/releases/download/v1.2.8/xournalpp-1.2.8-Debian-bookworm-x86_64.deb`
18
+ - there're issues!!
19
+ - alternative: use appimage:
20
+ - `wget --no-check-certificate https://github.com/xournalpp/xournalpp/releases/download/v1.2.8/xournalpp-1.2.8-x86_64.AppImage`
21
+
22
+ ## Commands to set up Supabase for event logging and data storage
23
+
24
+ Contents of `.env` file:
25
+
26
+ ```bash
27
+ DEMO=1
28
+ SB_URL="https://<add here>.supabase.co"
29
+ SB_KEY="<add here>"
30
+ SB_BUCKET_NAME="xournalpp_htr_hf_space"
31
+ SB_SCHEMA_NAME="public"
32
+ SB_TABLE_NAME="xournalpp_htr_hf_space_events"
33
+ ```
34
+
35
+ Create the events table:
36
+
37
+ ```sql
38
+ create table public.xournalpp_htr_hf_space_events (
39
+ id bigserial primary key,
40
+ timestamp timestamptz not null,
41
+ demo boolean not null,
42
+ session_id text not null,
43
+ donate_data bool not null,
44
+ interaction text not null
45
+ );
46
+ ```
47
+
48
+ Create bucket:
49
+
50
+ ```
51
+ xournalpp_htr_hf_space
52
+ ```
docs/images/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+ # Put all images here
docs/images/TODO.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - Add 90s qiuckstart video and document.
docs/images/system_design.jpg ADDED
docs/index.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Xournal++ HTR
2
+
3
+ Developing [handwritten text recognition](https://en.wikipedia.org/wiki/Handwriting_recognition) for [Xournal++](https://github.com/xournalpp/xournalpp).
4
+
5
+ *Your contributions are greatly appreciated!*
6
+
7
+ ## Xournal++ HTR in 90 seconds
8
+
9
+ <div align="center">
10
+
11
+ <iframe width="560" height="315" src="https://www.youtube.com/embed/boXm7lPFSRQ?si=Yg8tLBs-_1BtQKrU" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
12
+
13
+ <br>
14
+
15
+ <i>(<a href="https://www.youtube.com/watch?v=boXm7lPFSRQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
16
+
17
+ </div>
18
+
19
+ ## Why Handwritten Text Recognition for Xournal++?
20
+
21
+ A key benefit of digital note-taking is searchability, which digital handwritten notes lack
22
+ without [handwritten text recognition (HTR)](https://en.wikipedia.org/wiki/Handwriting_recognition).
23
+ While many commercial apps offer this feature, no open-source, privacy-focused handwriting
24
+ app does - until now.
25
+
26
+ The **Xournal++ HTR** project aims to bring on-device handwriting recognition to
27
+ [Xournal++](https://xournalpp.github.io/), a leading open-source note-taking platform.
28
+ This will make handwritten notes searchable while ensuring user privacy through local data
29
+ processing.
30
+
31
+ ## Content of these websites
32
+
33
+ These websites document Xournal++ HTR. In the navigation bar, you can find instructions on
34
+ how to install the project, use the project and more advanced topics like how you can contribute
35
+ code and own models. In the future, many of the documents will come with small videos to get you going quicker.
36
+
37
+ <!-- To assist you in training your own models, Xournal++ HTR comes with many helper functions and -->
38
+ <!-- convenience code infrastructure. -->
39
+
40
+ ## Cite
41
+
42
+ If you are using Xournal++ HTR for your research, I'd appreciate if you could cite it. Use:
43
+
44
+ ```
45
+ @software{Lellep_Xournalpp_HTR,
46
+ author = {Lellep, Martin},
47
+ title = {xournalpp_htr},
48
+ url = {https://github.com/PellelNitram/xournalpp_htr},
49
+ license = {GPL-2.0},
50
+ }
51
+ ```
52
+
53
+ *(Also please consider starring the project on GitHub.)*
docs/installation_developer.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Development installation
2
+
3
+ 1. Perform the same installation steps as described in the [user installation manual](installation_user.md).
4
+ 2. Then, install developer dependencies: `pip install -r requirements_training.txt`.
5
+
6
+ Depending on your needs, it is probably worth creating a dedicated Python environment for development. To do
7
+ so, simply change `xournalpp_htr` from [user installation manual](installation_user.md) to another name like `xournalpp_htr_dev` when you follow the above development installation steps.
docs/installation_user.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installation
2
+
3
+ This project consists of both the inference and training code. Most users will only be interested in the inference part, so that the below only comprises of the inference part that you need to execute the plugin from within Xournal++.
4
+
5
+ The training part is optional and allows to help to train our own models which improve over time. This installation process is optional and detailed in [the developer guide](developer_guide.md#Installation).
6
+
7
+ ## Linux
8
+
9
+ Run `bash INSTALL_LINUX.sh` from repository root directory.
10
+
11
+ This script also installs the plugin as explained in the last point of the cross-platform installation procedure. The installation of the plugin is performed with `plugin/copy_to_plugin_folder.sh`, which can also be invoked independently of `INSTALL_LINUX.sh` for updating the plugin installation.
12
+
13
+ ## Cross-platform
14
+
15
+ If you want to install the plugin manually, then execute the following commands:
16
+
17
+ 1. Create an environment: ``conda create --name xournalpp_htr python=3.10.11``.
18
+ 2. Use this environment: ``conda activate xournalpp_htr``.
19
+ 3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
20
+ 4. Install all dependencies of this package ``pip install -r requirements.txt``.
21
+ 5. Install the package in development mode with ``pip install -e .`` (do not forget the dot, '.').
22
+ 6. Install pre-commit hooks with: `pre-commit install`.
23
+ 7. Copy `plugin/` folder content to `${XOURNAL_CONFIG_PATH}/plugins/xournalpp_htr/` with `${XOURNAL_CONFIG_PATH}` being the configuration path of Xournal++, see Xournal++ manual [here](https://xournalpp.github.io/guide/file-locations/).
24
+ 8. Edit `config.lua`, setting `_M.python_executable` to your python executable **in the conda environment** and `_M.xournalpp_htr_path` to the absolute path of this repo. See the example config for details in `plugin/config.lua`.
25
+ 9. Ensure Xournal++ is on your `PATH`. See [here](https://xournalpp.github.io/guide/file-locations/) for the binary location.
26
+
27
+ ## After installation
28
+
29
+ Confirm that the installation worked by running `make tests-installation` from repository root directory.
docs/pyinstaller_experiment.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO!
2
+
3
+ # PyInstaller Experiment
4
+
5
+ For easier installation.
6
+
7
+ Scope: On Linux.
8
+
9
+ Commands I experimented with:
10
+
11
+ ```bash
12
+ cd xournalpp_htr
13
+ pyinstaller --onefile --add-data "../external/htr_pipeline/HTRPipeline/htr_pipeline/models:htr_pipeline/models" --hidden-import "PIL._tkinter_finder" run_htr.py
14
+ dist/run_htr --input-file /home/martin/data/xournalpp_htr/test_1.xoj --output-file /home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp-3.pdf
15
+ ```
16
+
17
+ This seems to work on my Ubuntu PC.
18
+
19
+ Open questions:
20
+ - Does it work on other linux computers?
21
+ - Idea: check w/ EC2/GCP-VM instances.
22
+ - How to include the `xournalpp` binary in order to export the `xopp` file to a PDF?
23
+ - Idea: Let the use select the `xournalpp` path?
docs/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ mkdocs
2
+ mkdocs-material
3
+ mkdocs-git-revision-date-localized-plugin
docs/roadmap.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ On this page, we outline the project's intended roadmap. This plan helps us strategically manage our time and resources.
2
+
3
+ Below, we present our roadmap. It may evolve over time, so we will preserve previous versions to maintain transparency.
4
+
5
+ ## Roadmap as of *2025-05-03*
6
+
7
+ ### Visual Overview
8
+
9
+ ```mermaid
10
+ flowchart LR
11
+ A0(
12
+ Conduct
13
+ dataset
14
+ research
15
+ )
16
+ A(
17
+ Reimplement
18
+ <a href="https://github.com/githubharald/HTRPipeline">htr_pipeline</a>
19
+ )
20
+ B(
21
+ Classic algos w
22
+ <a href="https://github.com/PellelNitram/OnlineHTR">OnlineHTR</a>
23
+ )
24
+ C(
25
+ Start own
26
+ modeling
27
+ )
28
+ D(
29
+ Introduce
30
+ quality
31
+ measures
32
+ )
33
+ E(
34
+ Graph NN w
35
+ <a href="https://github.com/PellelNitram/OnlineHTR">OnlineHTR</a>
36
+ )
37
+ F(
38
+ Make
39
+ installation
40
+ easier
41
+ )
42
+ G(
43
+ Explore offline
44
+ recognition models
45
+ like <a href="https://arxiv.org/abs/1904.01941">CRAFT</a>
46
+ )
47
+ A --> F
48
+ F --> D
49
+ D --> A0
50
+ A0 --> C
51
+ C --> B
52
+ C --> E
53
+ C --> G
54
+ ```
55
+
56
+ ### Explanation
57
+
58
+ This project has many potential directions, with the primary goal of delivering optimal value to users. While we are eager to implement advanced machine learning algorithms, we must first focus on usability improvements.
59
+
60
+ Our main mid-term objective is to simplify the installation process, as users have reported it is too complex.
61
+
62
+ Explanation of the steps:
63
+
64
+ - **Reimplement [htr_pipeline](https://github.com/githubharald/HTRPipeline):**
65
+ We currently use the excellent [htr_pipeline](https://github.com/githubharald/HTRPipeline) by [Harald Scheidl](https://github.com/githubharald) for machine learning, but it being an external dependency complicates installation and them hosting model weights on Dropbox is not suitable for our needs. To address this, we plan to integrate these models directly into our project. Since the original repository lacks a license, we'll implement our own version, drawing inspiration from the existing work. This approach will deliver an easy-to-install product quickly, as we already know the requirements & model details. Additionally, it enhances our understanding of training models for both online and offline handwriting data. With our own models, we'll automate model retrieval and establish a model registry, likely using [Hugging Face](https://huggingface.co/), as part of adhering to MLOps best practices. Experimentation with new algorithms will benefit from the model registry and will occur subsequently, as it is more time-consuming.
66
+
67
+ - **Make installation easier:**
68
+ We aim to make the installation process seamless across platforms, including Linux and Windows, with future support for Mac if access becomes available to us. Implementing a model registry will streamline model management and deployment, aiding future model development and enhancing ease of use while aligning with best practices.
69
+
70
+ - **Introduce quality measures:**
71
+ To identify the best model, we need to quantify performance. Ideally, one metric will suffice, but two may be necessary if recognition and transcription remain separate tasks.
72
+
73
+ - **Classic algos w [OnlineHTR](https://github.com/PellelNitram/OnlineHTR):**
74
+ The plan is to use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) for transcription alongside classical (non-data-driven) algorithms for recognition.
75
+
76
+ - **Graph NN w [OnlineHTR](https://github.com/PellelNitram/OnlineHTR):**
77
+ We aim to use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) for transcription and a graph neural network for recognition. This approach seeks to develop a high-performing model that operates on the native online representation of handwriting.
docs/user_guide.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Usage
2
+
3
+ The usage of the project is fairly simple. First, there is a Python script that performs the actual work & is useful for headless operations like batch processing. Second, and probably much more useful for the average user, the Lua plugin can be used from within Xournal++ and invokes the aforementioned Python script under the hood.
4
+
5
+ ## The Lua plugin
6
+
7
+ Details relevant for usage of the Lua plugin:
8
+
9
+ 1. Make sure to save your file in Xournal++ beforehand. The plugin will also let you know that you need to save your file first.
10
+ 2. After installation, navigate to `Plugin > Xournal++ HTR` to invoke the plugin. Then select a filename and press `Save`. Lastly, wait a wee bit until the process is finished; the Xournal++ UI will block while the plugin applies HTR to your file. If you opened Xournal++ through a command-line, you can see progress bars that show the HTR process in real-time.
11
+
12
+ Note: Currently, the Xournal++ HTR plugin requires you to use a nightly build of Xournal++ because it uses upstream Lua API features that are not yet part of the stable build. Using the officially provided Nightly AppImag, see [here](https://xournalpp.github.io/installation/linux/), is very convenient. The plugin has been tested with the following nightly Linux build of Xournal++:
13
+
14
+ ```
15
+ xournalpp 1.2.3+dev (583a4e47)
16
+ └──libgtk: 3.24.20
17
+ ```
18
+
19
+ ## The Python script
20
+
21
+ It is located in `xournalpp_htr/run_htr.py` and it features a command line interface that documents the usage of the Python script.
experiments/2025-02-05_writing_test/index.html ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Handwritten Text App</title>
7
+ <link rel="stylesheet" href="styles.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Handwritten Text App</h1>
12
+ <canvas id="canvas" width="600" height="400"></canvas>
13
+ <button id="exportButton">Export to JSON</button>
14
+ </div>
15
+ <script src="script.js"></script>
16
+ </body>
17
+ </html>
experiments/2025-02-05_writing_test/script.js ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', () => {
2
+ const canvas = document.getElementById('canvas');
3
+ const ctx = canvas.getContext('2d');
4
+ const exportButton = document.getElementById('exportButton');
5
+
6
+ let drawing = false;
7
+ const strokes = [];
8
+
9
+ canvas.addEventListener('mousedown', (e) => {
10
+ drawing = true;
11
+ const { offsetX, offsetY } = e;
12
+ const time = new Date().toISOString();
13
+ strokes.push({ x: offsetX, y: offsetY, time });
14
+ ctx.beginPath();
15
+ ctx.moveTo(offsetX, offsetY);
16
+ });
17
+
18
+ canvas.addEventListener('mousemove', (e) => {
19
+ if (!drawing) return;
20
+ const { offsetX, offsetY } = e;
21
+ const time = new Date().toISOString();
22
+ strokes.push({ x: offsetX, y: offsetY, time });
23
+ ctx.lineTo(offsetX, offsetY);
24
+ ctx.stroke();
25
+ });
26
+
27
+ canvas.addEventListener('mouseup', () => {
28
+ drawing = false;
29
+ ctx.closePath();
30
+ });
31
+
32
+ canvas.addEventListener('mouseleave', () => {
33
+ drawing = false;
34
+ ctx.closePath();
35
+ });
36
+
37
+ exportButton.addEventListener('click', () => {
38
+ const json = JSON.stringify(strokes, null, 2);
39
+ const blob = new Blob([json], { type: 'application/json' });
40
+ const url = URL.createObjectURL(blob);
41
+ const a = document.createElement('a');
42
+ a.href = url;
43
+ a.download = 'strokes.json';
44
+ a.click();
45
+ URL.revokeObjectURL(url);
46
+ });
47
+ });
experiments/2025-02-05_writing_test/styles.css ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ display: flex;
3
+ justify-content: center;
4
+ align-items: center;
5
+ height: 100vh;
6
+ margin: 0;
7
+ font-family: Arial, sans-serif;
8
+ background-color: #f0f0f0;
9
+ }
10
+
11
+ .container {
12
+ text-align: center;
13
+ }
14
+
15
+ canvas {
16
+ border: 1px solid #000;
17
+ background-color: #fff;
18
+ cursor: crosshair;
19
+ }
mkdocs.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ site_name: Xournal++ HTR
2
+ site_description: Developing handwritten text recognition for Xournal++
3
+
4
+ repo_name: PellelNitram/xournalpp_htr
5
+ repo_url: https://github.com/PellelNitram/xournalpp_htr
6
+ edit_uri: edit/master/docs/
7
+
8
+ strict: true
9
+
10
+ theme:
11
+ name: material
12
+
13
+ plugins:
14
+ - search # necessary for search to work
15
+ - git-revision-date-localized:
16
+ timezone: Europe/London
17
+ locale: en
18
+ fallback_to_build_date: false
19
+ enable_creation_date: true
20
+
21
+ nav:
22
+ - Introduction: 'index.md'
23
+ - Getting Started as User:
24
+ - Installation: 'installation_user.md'
25
+ - User Guide: 'user_guide.md'
26
+ - Getting Started as Developer:
27
+ - Installation: 'installation_developer.md'
28
+ - Developer Guide: 'developer_guide.md'
29
+ # - Data Collection: 'data_collection.md' # Unclear if even needed
30
+ # - Developing New Models: 'developing_new_models.md' # Very unclear what to write as I haven't built anything yet
31
+ - Contributing: 'contributing.md'
32
+ - Roadmap: 'roadmap.md'
33
+ - Funding: 'funding.md'
34
+
35
+ markdown_extensions:
36
+ - pymdownx.superfences: # To enable mermaid.js charts, see https://squidfunk.github.io/mkdocs-material/reference/diagrams/.
37
+ custom_fences:
38
+ - name: mermaid
39
+ class: mermaid
40
+ format: !!python/name:pymdownx.superfences.fence_code_format
notebooks/experiment_with_IAM_OnDo_dataset.ipynb ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "1d03e361-cb11-49aa-9cf7-5e0a590186c5",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Experiment w IAM OnDo dataset\n",
9
+ "\n",
10
+ "That is b/c it potentially comes with segmented word information, which is useful for a revised WordDetectorNN network.\n",
11
+ "\n",
12
+ "- [great for viewing XML files in formatted way](https://jsonformatter.org/xml-viewer/475e9e).\n",
13
+ "- [interesting package](https://github.com/RobinXL/inkml2img/blob/master/inkml2img.py)"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "id": "d1e6e7ca-882c-46a2-a4c0-79ae770b0b3a",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "from pathlib import Path\n",
24
+ "\n",
25
+ "import matplotlib.pyplot as plt\n",
26
+ "import pandas as pd"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "id": "3bda86c1-cb9c-45af-a7f0-4a010c6a8a1e",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "BASE_PATH = Path(\"/home/martin/Development/xournalpp_htr/data/datasets/IAMonDo-db-1.0/\")"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": null,
42
+ "id": "99d9e548-c8cf-43ec-9e1d-eb2327cdb828",
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "inkml_path = BASE_PATH / \"001e.inkml\""
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "markdown",
51
+ "id": "f30ff098-9723-408c-97cd-1bfcbb672c7c",
52
+ "metadata": {},
53
+ "source": [
54
+ "*side idea: build InkML class! it'd be cool to make package from that and maybe publish it.*"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "id": "1cc46913-7eeb-4dc8-a19a-874ab6b5d6a5",
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "import xml.etree.ElementTree as ET\n",
65
+ "\n",
66
+ "tree = ET.parse(inkml_path)\n",
67
+ "root = tree.getroot()"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "id": "27998475-db52-4e4b-9b18-63d5e5e64f56",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "root"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "id": "6ac612f7-3187-4851-bcd8-6c022380d2a5",
83
+ "metadata": {},
84
+ "source": [
85
+ "Explore `root` w [this](https://docs.python.org/3/library/xml.etree.elementtree.html):"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "id": "b1cbbb3e-1bcc-4360-9114-65f791b5b413",
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "root.tag, root.attrib"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": null,
101
+ "id": "43cb98e1-b146-4bdf-89b3-d23089434570",
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "for child in root:\n",
106
+ " print(child.tag, child.attrib)"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "id": "83de1f4d-e142-4b48-ba64-b7d623015754",
112
+ "metadata": {},
113
+ "source": [
114
+ "indeed, the above is the content of the file."
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "id": "4c17d724-3916-4c62-9ea0-868d891f396d",
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "# todo: cont exploration"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "markdown",
129
+ "id": "7ef3f07e-dc5e-423b-a415-174696d5d5ca",
130
+ "metadata": {},
131
+ "source": [
132
+ "## experiment w/ loading both stroke and corresponding text"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": null,
138
+ "id": "b089ed32-f24b-4f89-94af-5f7cbe5c56ec",
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "traceView = root[-1] # to access `traceView`\n",
143
+ "traceView"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": null,
149
+ "id": "03fd3ed6-695b-4b4f-b70a-d9ba1d6fd4e1",
150
+ "metadata": {},
151
+ "outputs": [],
152
+ "source": [
153
+ "traceView"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "markdown",
158
+ "id": "a8b3635f-269d-4dd8-981a-36b7553e5576",
159
+ "metadata": {},
160
+ "source": [
161
+ "`textblock` and `marking` seems interesting!"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "id": "5642f657-f317-448c-9339-486cab2c6063",
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": [
171
+ "marking = traceView[-1]\n",
172
+ "marking"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "id": "8a4e5d6b-b278-48eb-b4dc-54f22c38fb8a",
179
+ "metadata": {},
180
+ "outputs": [],
181
+ "source": [
182
+ "marking[0].text"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": null,
188
+ "id": "45f82ba1-5328-4227-b366-12a781fbd27f",
189
+ "metadata": {},
190
+ "outputs": [],
191
+ "source": [
192
+ "marking[2][0].text, marking[2][1].text"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "id": "5bb7f46c-d682-4bc2-9486-fa7b6038f32c",
199
+ "metadata": {},
200
+ "outputs": [],
201
+ "source": [
202
+ "tmp = marking[2][2]"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": null,
208
+ "id": "6ba6226b-44bb-4932-82da-92773c4faeb6",
209
+ "metadata": {},
210
+ "outputs": [],
211
+ "source": [
212
+ "ids_to_use = []\n",
213
+ "\n",
214
+ "for x in tmp:\n",
215
+ " if x.tag == \"traceView\":\n",
216
+ " ids_to_use.append(x.attrib[\"traceDataRef\"])"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "id": "0184aded-1d9f-4bbd-b48b-11735d2b60a2",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "ids_to_use"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "id": "52358cf2-585d-4130-b90b-f5e0ab5d8015",
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "traces_to_use = []\n",
237
+ "\n",
238
+ "for x in root.findall(\"trace\"):\n",
239
+ " id_to_check = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"]\n",
240
+ " for y in ids_to_use:\n",
241
+ " if y[1:] == id_to_check:\n",
242
+ " traces_to_use.append([id_to_check, x.text])\n",
243
+ "\n",
244
+ "traces_to_use.sort(key=lambda x: x[0])"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "markdown",
249
+ "id": "997d1c64-a7e1-474e-9bd1-6567b3da317b",
250
+ "metadata": {},
251
+ "source": [
252
+ "get dfs of traces:"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": null,
258
+ "id": "0f35219c-1cb2-4c5f-98e6-2eecb51b16d2",
259
+ "metadata": {},
260
+ "outputs": [],
261
+ "source": [
262
+ "dfs = []\n",
263
+ "\n",
264
+ "for name, trace in traces_to_use:\n",
265
+ " print(name)\n",
266
+ " trace = [\n",
267
+ " [float(yy) for yy in xx.replace(\"-\", \" -\").split()]\n",
268
+ " for xx in trace.split(\",\")\n",
269
+ " if xx[0] not in [\"'\", '\"']\n",
270
+ " ]\n",
271
+ " df = pd.DataFrame(data=trace, columns=[\"x\", \"y\", \"t\", \"f\"])\n",
272
+ "\n",
273
+ " dfs.append(df)"
274
+ ]
275
+ },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": null,
279
+ "id": "c4aa0a02-3089-4cdc-9a9f-db749e515573",
280
+ "metadata": {},
281
+ "outputs": [],
282
+ "source": [
283
+ "plt.figure()\n",
284
+ "for df in dfs:\n",
285
+ " plt.scatter(df.cumsum()[\"x\"], df.cumsum()[\"y\"])\n",
286
+ "plt.show()"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "markdown",
291
+ "id": "d304a1f8-dbee-4d25-b264-ecb9b74d2838",
292
+ "metadata": {},
293
+ "source": [
294
+ "ok, apparently i have no idea what I am plotting :-D"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "markdown",
299
+ "id": "143fbc12-f553-4bba-b61c-269000872a3e",
300
+ "metadata": {},
301
+ "source": [
302
+ "next steps:\n",
303
+ "- read spec of IAM On Do to learn what is stored.\n",
304
+ "- Read [this spec](https://www.w3.org/TR/InkML/#trace) to understand the above cryptic string and then plot it to see if it suits my needs of segmented word data."
305
+ ]
306
+ }
307
+ ],
308
+ "metadata": {
309
+ "kernelspec": {
310
+ "display_name": "Python 3 (ipykernel)",
311
+ "language": "python",
312
+ "name": "python3"
313
+ },
314
+ "language_info": {
315
+ "codemirror_mode": {
316
+ "name": "ipython",
317
+ "version": 3
318
+ },
319
+ "file_extension": ".py",
320
+ "mimetype": "text/x-python",
321
+ "name": "python",
322
+ "nbconvert_exporter": "python",
323
+ "pygments_lexer": "ipython3",
324
+ "version": "3.10.11"
325
+ }
326
+ },
327
+ "nbformat": 4,
328
+ "nbformat_minor": 5
329
+ }
notebooks/experiment_with_clustering_for_online_word_detection.ipynb ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Experiment w clustering for online word detection"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "%load_ext autoreload\n",
17
+ "%autoreload 2"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 17,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "from pathlib import Path\n",
27
+ "\n",
28
+ "import matplotlib.patches as patches\n",
29
+ "import matplotlib.pyplot as plt\n",
30
+ "import numpy as np\n",
31
+ "import pandas as pd\n",
32
+ "from sklearn.cluster import (\n",
33
+ " DBSCAN,\n",
34
+ " HDBSCAN,\n",
35
+ " AffinityPropagation,\n",
36
+ " AgglomerativeClustering,\n",
37
+ " MeanShift,\n",
38
+ " SpectralClustering,\n",
39
+ ")\n",
40
+ "from sklearn.metrics import adjusted_rand_score\n",
41
+ "\n",
42
+ "from xournalpp_htr.training.io import load_list_of_bboxes\n",
43
+ "from xournalpp_htr.training.visualise import plot_clustered_document"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "markdown",
48
+ "metadata": {},
49
+ "source": [
50
+ "## Experiment structure\n",
51
+ "\n",
52
+ "### Hypothesis\n",
53
+ "\n",
54
+ "One can find an algorithm that segments strokes into words using my handwriting.\n",
55
+ "\n",
56
+ "Side note: This is useful b/c I can then use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) to transcribe the words.\n",
57
+ "\n",
58
+ "### Notebook structure\n",
59
+ "\n",
60
+ "1. Load data, incl ground truth.\n",
61
+ "2. Pre-compute a set of features. Later, feature engineering might be added.\n",
62
+ "3. Iterate over a few algorithms and measure their performance using the ground truth.\n",
63
+ "\n",
64
+ "Alternative addition later on: Manually remove strokes that're too long (in distribution sense) or too straight. That is another step because it will require a dataset with such strokes that don't belong to words."
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 3,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "## Settings\n",
74
+ "\n",
75
+ "OUTPUT_PATH = Path(\"experiment_results\")\n",
76
+ "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n",
77
+ "\n",
78
+ "PLOT_RESULTS = True\n",
79
+ "PLOT_RESULTS = False"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "markdown",
84
+ "metadata": {},
85
+ "source": [
86
+ "## Helper functions"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": 4,
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "# TODO: Add here if necessary."
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "markdown",
100
+ "metadata": {},
101
+ "source": [
102
+ "## Load annotations"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "markdown",
107
+ "metadata": {},
108
+ "source": [
109
+ "Previously, I loaded the data as `XournalppDocument` but that approach lacked ground truth data. Instead, I now load the annotated data, which comes with ground truth data."
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": 5,
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "annotated_bboxes = load_list_of_bboxes(\n",
119
+ " \"../tests/data/2024-10-13_minimal.annotations.json\"\n",
120
+ ")\n",
121
+ "\n",
122
+ "DPI = 72 # TODO: Add this to annotations!\n",
123
+ "\n",
124
+ "# TODO: Maybe integrate `/DPI` into the x and y values? Maybe convert to cm?\n",
125
+ "# TODO: Add page dimensions, i.e.:\n",
126
+ "# - float(page.meta_data[\"width\"]) / DPI,\n",
127
+ "# - float(page.meta_data[\"height\"]) / DPI,"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": null,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "# ========\n",
137
+ "# Figure 1\n",
138
+ "# ========\n",
139
+ "\n",
140
+ "length = len(annotated_bboxes[\"bboxes\"])\n",
141
+ "nr_2 = 4\n",
142
+ "nr_1 = length // nr_2 + 1\n",
143
+ "\n",
144
+ "fig, axes = plt.subplots(nrows=nr_1, ncols=nr_2, figsize=(10, 8))\n",
145
+ "\n",
146
+ "for i_bbox in range(length):\n",
147
+ " bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
148
+ "\n",
149
+ " a = axes.flatten()[i_bbox]\n",
150
+ "\n",
151
+ " a.set_aspect(\"equal\")\n",
152
+ " a.set_title(bbox[\"text\"])\n",
153
+ " a.set_xlabel(\"x\")\n",
154
+ " a.set_ylabel(\"-y\")\n",
155
+ "\n",
156
+ " for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
157
+ " x = bbox_stroke[\"x\"] / DPI\n",
158
+ " y = bbox_stroke[\"y\"] / DPI\n",
159
+ " a.scatter(x, -y, c=\"black\", s=1)\n",
160
+ "\n",
161
+ "plt.tight_layout()\n",
162
+ "plt.show()\n",
163
+ "\n",
164
+ "# ========\n",
165
+ "# Figure 2\n",
166
+ "# ========\n",
167
+ "\n",
168
+ "plt.figure(figsize=(10, 8))\n",
169
+ "\n",
170
+ "a = plt.gca()\n",
171
+ "a.set_aspect(\"equal\")\n",
172
+ "a.set_xlabel(\"x\")\n",
173
+ "a.set_ylabel(\"-y\")\n",
174
+ "\n",
175
+ "for i_bbox in range(length):\n",
176
+ " bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
177
+ "\n",
178
+ " # Draw bbox\n",
179
+ " xy = (\n",
180
+ " min([bbox[\"point_1_x\"], bbox[\"point_2_x\"]]) / DPI,\n",
181
+ " min([-bbox[\"point_1_y\"], -bbox[\"point_2_y\"]])\n",
182
+ " / DPI, # TODO: This messing around w/ y coord sign is annoying\n",
183
+ " )\n",
184
+ " dx = np.abs(bbox[\"point_1_x\"] - bbox[\"point_2_x\"]) / DPI\n",
185
+ " dy = np.abs(bbox[\"point_1_y\"] - bbox[\"point_2_y\"]) / DPI\n",
186
+ " a.add_patch(\n",
187
+ " patches.Rectangle(xy, dx, dy, linewidth=1, edgecolor=\"r\", facecolor=\"none\")\n",
188
+ " )\n",
189
+ "\n",
190
+ " # Draw label\n",
191
+ " a.text(x=xy[0], y=xy[1] + dy, s=bbox[\"text\"], c=\"red\")\n",
192
+ "\n",
193
+ " for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
194
+ " x = bbox_stroke[\"x\"] / DPI\n",
195
+ " y = bbox_stroke[\"y\"] / DPI\n",
196
+ " a.scatter(x, -y, c=\"black\", s=1)\n",
197
+ "\n",
198
+ "plt.tight_layout()\n",
199
+ "plt.show()"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "markdown",
204
+ "metadata": {},
205
+ "source": [
206
+ "Prepare list of all strokes w/ relevant meta information as ground truth. This variable serves as training data:"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": 7,
212
+ "metadata": {},
213
+ "outputs": [],
214
+ "source": [
215
+ "df_strokes_data = {\n",
216
+ " \"x\": [],\n",
217
+ " \"y\": [],\n",
218
+ " \"x_mean\": [],\n",
219
+ " \"y_mean\": [],\n",
220
+ " \"i_bbox\": [],\n",
221
+ " \"text\": [],\n",
222
+ "}\n",
223
+ "\n",
224
+ "for i_bbox in range(len(annotated_bboxes[\"bboxes\"])):\n",
225
+ " bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
226
+ "\n",
227
+ " for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
228
+ " x = +bbox_stroke[\"x\"] / DPI\n",
229
+ " y = -bbox_stroke[\"y\"] / DPI\n",
230
+ "\n",
231
+ " df_strokes_data[\"x\"].append(x)\n",
232
+ " df_strokes_data[\"y\"].append(y)\n",
233
+ " df_strokes_data[\"x_mean\"].append(np.mean(x))\n",
234
+ " df_strokes_data[\"y_mean\"].append(np.mean(y))\n",
235
+ " df_strokes_data[\"i_bbox\"].append(i_bbox)\n",
236
+ " df_strokes_data[\"text\"].append(bbox[\"text\"])\n",
237
+ "\n",
238
+ "df_train = pd.DataFrame.from_dict(df_strokes_data)"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "markdown",
243
+ "metadata": {},
244
+ "source": [
245
+ "Plot the training data:"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": null,
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "plt.figure(figsize=(10, 8))\n",
255
+ "\n",
256
+ "a = plt.gca()\n",
257
+ "a.set_aspect(\"equal\")\n",
258
+ "a.set_xlabel(\"x\")\n",
259
+ "a.set_ylabel(\"y\")\n",
260
+ "\n",
261
+ "for (i_bbox, text), df_grouped in df_train.groupby(\n",
262
+ " [\"i_bbox\", \"text\"],\n",
263
+ "):\n",
264
+ " a.scatter(df_grouped[\"x_mean\"], df_grouped[\"y_mean\"], c=\"red\", s=2, zorder=999)\n",
265
+ "\n",
266
+ " bottom_left_x = np.inf\n",
267
+ " bottom_left_y = np.inf\n",
268
+ " top_right_x = -np.inf\n",
269
+ " top_right_y = -np.inf\n",
270
+ " for _, row in df_grouped.iterrows():\n",
271
+ " a.plot(row.x, row.y) # , c=cmap(i_row/N))\n",
272
+ " if row.x.min() < bottom_left_x:\n",
273
+ " bottom_left_x = row.x.min()\n",
274
+ " if row.y.min() < bottom_left_y:\n",
275
+ " bottom_left_y = row.y.min()\n",
276
+ " if row.x.max() > top_right_x:\n",
277
+ " top_right_x = row.x.max()\n",
278
+ " if row.y.max() > top_right_y:\n",
279
+ " top_right_y = row.y.max()\n",
280
+ "\n",
281
+ " # Plot bounding box\n",
282
+ " xy = (bottom_left_x, bottom_left_y)\n",
283
+ " dx = top_right_x - bottom_left_x\n",
284
+ " dy = top_right_y - bottom_left_y\n",
285
+ " a.add_patch(\n",
286
+ " patches.Rectangle(xy, dx, dy, linewidth=1, edgecolor=\"r\", facecolor=\"none\")\n",
287
+ " )\n",
288
+ "\n",
289
+ " # Plot text\n",
290
+ " a.text(x=bottom_left_x, y=top_right_y, s=f'\"{text}\" ({i_bbox})', c=\"red\")\n",
291
+ "\n",
292
+ "\n",
293
+ "plt.tight_layout()\n",
294
+ "plt.show()"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "markdown",
299
+ "metadata": {},
300
+ "source": [
301
+ "## Iterate over clustering algorithms"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": null,
307
+ "metadata": {},
308
+ "outputs": [],
309
+ "source": [
310
+ "%%time\n",
311
+ "\n",
312
+ "all_clusterings = [\n",
313
+ " AgglomerativeClustering(\n",
314
+ " n_clusters=22, distance_threshold=None\n",
315
+ " ), # I hard-code 22 b/c I counted that there're 22 clusters\n",
316
+ " AgglomerativeClustering(n_clusters=10, distance_threshold=None),\n",
317
+ " AgglomerativeClustering(\n",
318
+ " n_clusters=None, distance_threshold=1e0\n",
319
+ " ), # One could maybe tune it by investigating nr of clusters over distance threshold; TODO: Distance threshold using distribution?!\n",
320
+ " SpectralClustering(\n",
321
+ " n_clusters=15, # 21,\n",
322
+ " affinity=\"nearest_neighbors\",\n",
323
+ " ),\n",
324
+ " SpectralClustering(\n",
325
+ " n_clusters=21, # 21,\n",
326
+ " affinity=\"nearest_neighbors\",\n",
327
+ " ),\n",
328
+ " SpectralClustering(\n",
329
+ " n_clusters=6, # 21,\n",
330
+ " affinity=\"nearest_neighbors\",\n",
331
+ " ),\n",
332
+ " MeanShift(\n",
333
+ " bandwidth=None,\n",
334
+ " ),\n",
335
+ " MeanShift(\n",
336
+ " bandwidth=0.1,\n",
337
+ " ),\n",
338
+ " MeanShift(\n",
339
+ " bandwidth=1.0,\n",
340
+ " ),\n",
341
+ " MeanShift(\n",
342
+ " bandwidth=10.0,\n",
343
+ " ),\n",
344
+ " AffinityPropagation(),\n",
345
+ " HDBSCAN(min_cluster_size=2),\n",
346
+ " # FeatureAgglomeration(\n",
347
+ " # n_clusters=None,\n",
348
+ " # distance_threshold=0.1,\n",
349
+ " # ),\n",
350
+ " # FeatureAgglomeration(\n",
351
+ " # n_clusters=None,\n",
352
+ " # distance_threshold=1.0,\n",
353
+ " # ),\n",
354
+ " # FeatureAgglomeration(\n",
355
+ " # n_clusters=None,\n",
356
+ " # distance_threshold=10.0,\n",
357
+ " # ),\n",
358
+ "]\n",
359
+ "\n",
360
+ "all_clusterings += [DBSCAN(eps) for eps in np.logspace(-4, 1, 1000)]\n",
361
+ "all_clusterings += [\n",
362
+ " AgglomerativeClustering(n_clusters=None, distance_threshold=DISTANCE_THRESHOLD)\n",
363
+ " for DISTANCE_THRESHOLD in np.logspace(-4, 1, 1000)\n",
364
+ "]\n",
365
+ "\n",
366
+ "results = {\n",
367
+ " \"index\": [],\n",
368
+ " \"score\": [],\n",
369
+ "}\n",
370
+ "for i_clustering, clustering in enumerate(all_clusterings):\n",
371
+ " print(i_clustering, clustering)\n",
372
+ " clustering.fit(df_train[[\"x_mean\", \"y_mean\"]])\n",
373
+ "\n",
374
+ " score = adjusted_rand_score(df_train[\"i_bbox\"], clustering.labels_)\n",
375
+ "\n",
376
+ " results[\"index\"].append(i_clustering)\n",
377
+ " results[\"score\"].append(score)\n",
378
+ "\n",
379
+ " # Plotting\n",
380
+ " if PLOT_RESULTS:\n",
381
+ " fig, [a_ground_truth, a_predicted] = plt.subplots(1, 2, figsize=(10, 8))\n",
382
+ " plot_clustered_document(\n",
383
+ " a_ground_truth,\n",
384
+ " a_predicted,\n",
385
+ " clustering,\n",
386
+ " annotated_bboxes,\n",
387
+ " DPI,\n",
388
+ " df_train,\n",
389
+ " a_predicted_title=f\"A-RAND={score}\",\n",
390
+ " )\n",
391
+ " plt.savefig(OUTPUT_PATH / f\"iClustering{i_clustering}.png\")\n",
392
+ " plt.close()\n",
393
+ "\n",
394
+ "results = pd.DataFrame.from_dict(results)"
395
+ ]
396
+ },
397
+ {
398
+ "cell_type": "code",
399
+ "execution_count": null,
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": [
403
+ "plt.figure()\n",
404
+ "\n",
405
+ "plt.scatter(results[\"index\"], results[\"score\"], c=\"red\")\n",
406
+ "\n",
407
+ "plt.xlabel(\"Index of clustering settings\")\n",
408
+ "plt.ylabel(\"Adjusted Rand Score (larger is better)\")\n",
409
+ "plt.savefig(\"2024-10-18_clustering_experiments.png\", dpi=200)"
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "markdown",
414
+ "metadata": {},
415
+ "source": [
416
+ "Next, check if the clusters make sense by plotting the clusters on the page of a set of pre-selected settings to test out:"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": 20,
422
+ "metadata": {},
423
+ "outputs": [],
424
+ "source": [
425
+ "# TODO!!!\n",
426
+ "\n",
427
+ "# CONTINUE TO WORK HERE!!!"
428
+ ]
429
+ },
430
+ {
431
+ "cell_type": "markdown",
432
+ "metadata": {},
433
+ "source": [
434
+ "TODO: Learning: The peak at ~800 seems to classify rows of text. This should be fine w/ OnlineHTR!"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": 1,
440
+ "metadata": {},
441
+ "outputs": [],
442
+ "source": [
443
+ "# TODO: Add more stroke features. Then run large screen. Also add feature selection.\n",
444
+ "# TODO: Maybe add k fold?"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "metadata": {},
450
+ "source": [
451
+ "Next, plot the dendrogram, see [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html#sphx-glr-auto-examples-cluster-plot-agglomerative-dendrogram-py)."
452
+ ]
453
+ },
454
+ {
455
+ "cell_type": "markdown",
456
+ "metadata": {},
457
+ "source": [
458
+ "Next, try out DBSCAN! Also see [here](https://scikit-learn.org/stable/modules/clustering.html)."
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "markdown",
463
+ "metadata": {},
464
+ "source": [
465
+ "Also next, try out another document to play around with."
466
+ ]
467
+ },
468
+ {
469
+ "cell_type": "markdown",
470
+ "metadata": {},
471
+ "source": [
472
+ "Question: Is my OnlineHTR model robust against rotated text?! Maybe one should rotate the text first?"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "markdown",
477
+ "metadata": {},
478
+ "source": [
479
+ "Note: It is probably worth it to write a bit of infrastructure code to experiment more (and easier and easier to compare) with these clustering approaches.\n",
480
+ "\n",
481
+ "Next: Feed these sequences to `OnlineHTR` or retrained `SimpleHTR` nmodel."
482
+ ]
483
+ },
484
+ {
485
+ "cell_type": "markdown",
486
+ "metadata": {},
487
+ "source": [
488
+ "## TODOs:\n",
489
+ "\n",
490
+ "- I think next cool thing to try out is to do proper feature engineering to try to enhance the features. Using the raw strokes could be regarded as last resort but IMHO doesn't make sense b/c a stroke always has a single word attached as strokes cannot be split, which they could be if one allows clusterings on the raw datapoints instead of strokes.\n",
491
+ "\n",
492
+ "- Good source for rand score: [see here](https://stats.stackexchange.com/questions/260229/comparing-a-clustering-algorithm-partition-to-a-ground-truth-one).\n",
493
+ "- After finding the best clustering, do apply OnlineHTR to check how it performs!\n",
494
+ "- To overcome the scale issue (i.e. everyone's handwriting scale is a wee bit different), one would need to use an approach that is based on 'nearest neighbours'. This works b/c one does not write on top of existing words.\n",
495
+ " - also, one could weight the x direction more in definition of closeness/distance\n",
496
+ "- Hook up OnlineHTR to here!\n",
497
+ "- I think the biggest problem for the OnlineHTR model would be the different line positions based on the way it was trained. Hence, one could maybe put extra emphasis on clusters being on similar y values.\n",
498
+ "- I have to say that I am unclear if a heuristic (i.e. a clustering algo w/ smartly chosen parameters) is really enough. Certainly for now, but a fully data-driven way would be better to accommodate different writers. This is probably relevant for a next iteration of the model.\n",
499
+ " - E.g., is this approach robust against larger handwriting?\n",
500
+ "- Hyper parameters like distance threshold are probably a function of the content of the page (e.g. diagrams, written text height, etc).\n",
501
+ "- It would be cool to try graph NN. Also, I'd love to add more features than the mean. That might help in learning."
502
+ ]
503
+ }
504
+ ],
505
+ "metadata": {
506
+ "kernelspec": {
507
+ "display_name": "xournalpp_htr",
508
+ "language": "python",
509
+ "name": "python3"
510
+ },
511
+ "language_info": {
512
+ "codemirror_mode": {
513
+ "name": "ipython",
514
+ "version": 3
515
+ },
516
+ "file_extension": ".py",
517
+ "mimetype": "text/x-python",
518
+ "name": "python",
519
+ "nbconvert_exporter": "python",
520
+ "pygments_lexer": "ipython3",
521
+ "version": "3.10.11"
522
+ }
523
+ },
524
+ "nbformat": 4,
525
+ "nbformat_minor": 2
526
+ }
plugin/config.lua ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ local _M = {}
2
+
3
+ -- user settings
4
+ _M.python_executable = "/home/martin/anaconda3/envs/xournalpp_htr/bin/python"
5
+ _M.xournalpp_htr_path = "/home/martin/Development/xournalpp_htr/xournalpp_htr/run_htr.py"
6
+ _M.model = "dummy"
7
+ _M.output_file = "/home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp.pdf"
8
+ _M.debug_HTR_command = false
9
+ -- TODO: allow UI to set other parameters as well of `xournalpp_htr`.
10
+
11
+ -- TODO replace later w/ temp exported file - filename will be derived automatically
12
+ _M.filename = "/home/martin/Development/xournalpp_htr/tests/test_1.xoj"
13
+
14
+ return _M
plugin/copy_to_plugin_folder.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ========
2
+ # SETTINGS
3
+ # ========
4
+
5
+ TARGET_FOLDER=~/.config/xournalpp/plugins/xournalpp_htr/
6
+ # TARGET_FOLDER=/usr/share/xournalpp/plugins/xournalpp_htr # requires `sudo`
7
+
8
+ # ============
9
+ # COPY PROCESS
10
+ # ============
11
+
12
+ mkdir -p ${TARGET_FOLDER}
13
+
14
+ cp plugin.ini ${TARGET_FOLDER}
15
+ cp main.lua ${TARGET_FOLDER}
16
+ cp config.lua ${TARGET_FOLDER}
plugin/demo_config.lua ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ local _M = {}
2
+
3
+ -- user settings
4
+ _M.python_executable = "/home/martin/anaconda3/envs/xournalpp_htr/bin/python"
5
+ _M.xournalpp_htr_path = "/home/martin/Development/xournalpp_htr/xournalpp_htr/run_htr.py"
6
+ _M.model = "dummy"
7
+ _M.output_file = "/home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp.pdf"
8
+ _M.debug_HTR_command = false
9
+ -- TODO: allow UI to set other parameters as well of `xournalpp_htr`.
10
+
11
+ -- TODO replace later w/ temp exported file - filename will be derived automatically
12
+ _M.filename = "/home/martin/Development/xournalpp_htr/tests/test_1.xoj"
13
+
14
+ return _M
plugin/main.lua ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ function initUi()
2
+ app.registerUi({["menu"] = "Xournal++ HTR", ["callback"] = "run", ["accelerator"] = "<Control>F1"});
3
+ end
4
+
5
+ function save_file(path)
6
+ if path:len() > 0 then
7
+
8
+ -- Read settings: I use this (https://stackoverflow.com/a/41176958). An
9
+ -- alternative could have been https://stackoverflow.com/a/41176826. Both
10
+ -- found using G"lua read settings file".
11
+ local config = require "config"
12
+
13
+ config.filename = '"' .. app.getDocumentStructure()['xoppFilename'] .. '"'
14
+ config.output_file = '"' .. path .. '"'
15
+
16
+ command = config.python_executable .. " " .. config.xournalpp_htr_path
17
+ .. " -if " .. config.filename
18
+ .. " -of " .. config.output_file
19
+ if config.debug_HTR_command then
20
+ print(command)
21
+ else
22
+ os.execute(command)
23
+ end
24
+
25
+ end
26
+ end
27
+
28
+ function run()
29
+
30
+ document_structure = app.getDocumentStructure()
31
+
32
+ if document_structure['xoppFilename']:len() == 0 then
33
+ app.openDialog('Please save document prior to exporting it as searchable PDF!', {"Ok"}, "", true)
34
+ else
35
+ app.fileDialogSave("save_file", "untitled.pdf")
36
+ end
37
+
38
+ end
39
+
40
+ -- TODO: Think of workflow to maximise usability for user
41
+ -- TODO: How to store settings? Ideally permanently?
42
+ -- TODO: Interesting code from example plugins:
43
+ -- - Get filename: https://github.com/xournalpp/xournalpp/blob/master/plugins/Export/main.lua#L29
44
+ -- - Toggle logic: https://github.com/xournalpp/xournalpp/blob/master/plugins/HighlightPosition/main.lua#L5
45
+ -- - UI: https://github.com/xournalpp/xournalpp/blob/master/plugins/MigrateFontSizes/main.lua
46
+ -- - OS interaction: https://github.com/xournalpp/xournalpp/blob/master/plugins/QuickScreenshot/main.lua
plugin/plugin.ini ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Based on this explanation: https://xournalpp.github.io/guide/plugins/plugins/
2
+
3
+ [about]
4
+ ## Author / Copyright notice
5
+ author=Martin Lellep
6
+
7
+ description=Developing handwritten text recognition for Xournal++
8
+
9
+ ## If the plugin is packed with Xournal++, use
10
+ ## <xournalpp> then it gets the same version number
11
+ version=0.1
12
+
13
+ [default]
14
+ enabled=false
15
+
16
+ [plugin]
17
+ mainfile=main.lua
pyproject.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ fix = true
3
+ show-fixes = true
4
+ line-length = 88
5
+ lint.select = [
6
+ "C", # mccabe rules
7
+ "F", # pyflakes rules
8
+ "E", # pycodestyle error rules
9
+ "W", # pycodestyle warning rules
10
+ "B", # flake8-bugbear rules
11
+ "I", # isort rules
12
+ ]
13
+ lint.ignore = [
14
+ "C901", # max-complexity-10
15
+ "E501", # line-too-long
16
+ ]
17
+
18
+ [tool.ruff.format]
19
+ indent-style = "space"
20
+ quote-style = "double"
pytest.ini ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [pytest]
2
+ minversion = 6.0
3
+ testpaths =
4
+ tests
5
+ markers =
6
+ slow: Marks tests as slow (select with '-m slow' and deselect with '-m "not slow"')
7
+ technical: Marks tests as technical tests to ensure that code features work as expected
8
+ correctness: Denotes tests that check physical behaviour and to ensure physical correctness
9
+ installation: Marks tests that confirm this package was installed correctly.
10
+ data: test data and its location.
11
+ visual_check: Marks tests that need visual checks.
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ beautifulsoup4
3
+ matplotlib
4
+ opencv-python
5
+ pytest
6
+ lxml
7
+ pymupdf
8
+ tqdm
9
+ pre-commit
requirements_training.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ jupyter
3
+ gradio
4
+ gitpython
scripts/demo.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import uuid
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+
7
+ import gradio as gr
8
+ from dotenv import load_dotenv
9
+ from pdf2image import convert_from_path
10
+ from supabase import Client, create_client
11
+
12
+ from xournalpp_htr.documents import get_document
13
+ from xournalpp_htr.models import compute_predictions
14
+ from xournalpp_htr.utils import export_to_pdf_with_xournalpp, get_env_variable
15
+ from xournalpp_htr.xio import write_predictions_to_PDF
16
+
17
+ load_dotenv()
18
+
19
+ DEMO = get_env_variable("DEMO") == "1"
20
+ SB_URL = get_env_variable("SB_URL")
21
+ SB_KEY = get_env_variable("SB_KEY")
22
+ SB_BUCKET_NAME = get_env_variable("SB_BUCKET_NAME")
23
+ SB_SCHEMA_NAME = get_env_variable("SB_SCHEMA_NAME")
24
+ SB_TABLE_NAME = get_env_variable("SB_TABLE_NAME")
25
+
26
+ # --- Image Processing Functions ---
27
+
28
+
29
+ def get_temporary_directory() -> Path:
30
+ return Path(tempfile.gettempdir())
31
+
32
+
33
+ def get_path_of_exported_pdf(session_id: str) -> Path:
34
+ return get_temporary_directory() / f"{session_id}_input_as_pdf.pdf"
35
+
36
+
37
+ def get_path_of_pdf_with_htr(session_id: str) -> Path:
38
+ return get_temporary_directory() / f"{session_id}_pdf_with_htr.pdf"
39
+
40
+
41
+ def log_interaction(
42
+ session_id: str,
43
+ donate_data: bool,
44
+ interaction: str,
45
+ document_path: str | None,
46
+ ):
47
+ supabase: Client = create_client(SB_URL, SB_KEY)
48
+
49
+ if donate_data and document_path:
50
+ document_path = Path(document_path)
51
+ destination_path = f"{session_id}{document_path.suffix}"
52
+ with open(document_path, "rb") as file:
53
+ supabase.storage.from_(SB_BUCKET_NAME).upload(
54
+ destination_path,
55
+ file,
56
+ {"content-type": "application/octet-stream"},
57
+ )
58
+
59
+ # Insert metadata row
60
+ row = {
61
+ "timestamp": datetime.now(timezone.utc).isoformat(),
62
+ "demo": DEMO,
63
+ "session_id": session_id,
64
+ "donate_data": donate_data,
65
+ "interaction": interaction,
66
+ }
67
+
68
+ supabase.schema(SB_SCHEMA_NAME).table(SB_TABLE_NAME).insert(row).execute()
69
+
70
+
71
+ def upload_document(document_path, session_id: str, donate_data: bool) -> str:
72
+ log_interaction(
73
+ session_id=session_id,
74
+ donate_data=donate_data,
75
+ interaction="upload_document",
76
+ document_path=document_path,
77
+ )
78
+ if document_path is None:
79
+ return None
80
+ return document_path
81
+
82
+
83
+ def document_to_image_of_first_page(document_path, session_id):
84
+ """Flips the input image horizontally."""
85
+ log_interaction(
86
+ session_id=session_id,
87
+ donate_data=False,
88
+ interaction="document_to_image_of_first_page",
89
+ document_path=None,
90
+ )
91
+ if document_path is None:
92
+ return None
93
+ output_path = get_path_of_exported_pdf(session_id)
94
+ export_to_pdf_with_xournalpp(
95
+ Path(document_path),
96
+ output_path,
97
+ )
98
+ images = convert_from_path(output_path, first_page=1, last_page=1)
99
+ first_page = images[0]
100
+ return first_page
101
+
102
+
103
+ def document_to_HTR_document_and_image_of_first_page(document_path, session_id):
104
+ """Rotates the input image 90 degrees counter-clockwise."""
105
+ log_interaction(
106
+ session_id=session_id,
107
+ donate_data=False,
108
+ interaction="document_to_HTR_document_and_image_of_first_page",
109
+ document_path=None,
110
+ )
111
+ if document_path is None:
112
+ return None
113
+ document_path = Path(document_path)
114
+ input_as_pdf_path = get_path_of_exported_pdf(session_id)
115
+ pdf_with_htr = get_path_of_pdf_with_htr(session_id)
116
+ document = get_document(document_path)
117
+ predictions = compute_predictions(
118
+ model_name="2024-07-18_htr_pipeline", document=document
119
+ )
120
+ write_predictions_to_PDF(
121
+ input_as_pdf_path,
122
+ pdf_with_htr,
123
+ predictions,
124
+ debug_htr=True,
125
+ ) # TODO: make it a generator to track progress externally like here.
126
+ images = convert_from_path(pdf_with_htr, first_page=1, last_page=1)
127
+ first_page = images[0]
128
+ return first_page
129
+
130
+
131
+ def save_HTR_document_for_download(session_id):
132
+ log_interaction(
133
+ session_id=session_id,
134
+ donate_data=False,
135
+ interaction="save_HTR_document_for_download",
136
+ document_path=None,
137
+ )
138
+ pdf_with_htr = get_path_of_pdf_with_htr(session_id)
139
+ if not pdf_with_htr.exists():
140
+ return None
141
+ return str(pdf_with_htr)
142
+
143
+
144
+ # --- Gradio UI Layout ---
145
+
146
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
+ gr.Markdown(
148
+ """
149
+ # [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) Demo
150
+
151
+ This is an online demo of the [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) project, which strives to bring modern handwritten
152
+ text recognition to open-source handwritten note softwares like [Xournal++](https://xournalpp.github.io/).
153
+
154
+ While [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) is natively built to be running locally, this demo deploys it online so you
155
+ can try it out without any installation. We do not collect any personal data (see [source code of this demo](https://github.com/PellelNitram/xournalpp_htr/blob/master/scripts/demo.py))
156
+ but allow you to donate your data if you want so that we can build better underlying machine learning models for all of us (all open-source, of course!).
157
+
158
+ Note that the HTR results are not yet perfect. This is an ongoing project and we are actively working on improving the models.
159
+ Currently, we are constrained by the limited amount of publicly available training data and by our working time (this is a hobby project next to our day jobs).
160
+
161
+ The "we" in the paragraphs above is currently really only me, [Martin Lellep](https://lellep.xyz/?utm_campaign=xppGradioDemo), the main developer of Xournal++ HTR. I really love to work on
162
+ [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr)! If you think this project is valuable and want to express your gratitute, then please feel free to [buy me a virtual coffee here](https://ko-fi.com/martin_l)
163
+ so that I can buy more GPU power for training models and continue to let the GPUs go brrr :-).
164
+ """
165
+ )
166
+
167
+ session_id = gr.State(
168
+ value=lambda: datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
169
+ + "_"
170
+ + str(uuid.uuid4())
171
+ )
172
+
173
+ original_image_state = gr.State()
174
+
175
+ donate_data_checkbox = gr.Checkbox(
176
+ label="Donate Data: Help us to improve our open-source models by donating your uploaded document. Everything will be released as open-source!",
177
+ value=False,
178
+ )
179
+
180
+ upload_button = gr.UploadButton(
181
+ "1. Click to Upload an XOJ File",
182
+ file_types=[".xoj", ".xopp"],
183
+ file_count="single",
184
+ )
185
+
186
+ with gr.Row():
187
+ image_viewer_1 = gr.Image(
188
+ label="Original document", interactive=False, height=350
189
+ )
190
+ image_viewer_2 = gr.Image(
191
+ label="Document with HTR", interactive=False, height=350
192
+ )
193
+
194
+ with gr.Row():
195
+ button_1 = gr.Button("2. Export to PDF and Show First Page")
196
+ button_2 = gr.Button("3. Compute PDF with HTR and Show First Page")
197
+
198
+ button_download = gr.Button("4. Download PDF with HTR")
199
+ file_output = gr.File(label="Download PDF with HTR")
200
+
201
+ # --- Event Handlers ---
202
+
203
+ upload_button.upload(
204
+ fn=upload_document,
205
+ inputs=[upload_button, session_id, donate_data_checkbox],
206
+ outputs=original_image_state,
207
+ )
208
+
209
+ button_1.click(
210
+ fn=document_to_image_of_first_page,
211
+ inputs=[original_image_state, session_id],
212
+ outputs=image_viewer_1,
213
+ )
214
+
215
+ button_2.click(
216
+ fn=document_to_HTR_document_and_image_of_first_page,
217
+ inputs=[original_image_state, session_id],
218
+ outputs=image_viewer_2,
219
+ )
220
+
221
+ button_download.click(
222
+ fn=save_HTR_document_for_download,
223
+ inputs=session_id,
224
+ outputs=file_output,
225
+ )
226
+
227
+
228
+ if __name__ == "__main__":
229
+ port = int(os.environ.get("PORT", 7860)) # Use HF-provided port or fallback
230
+ demo.launch(server_name="0.0.0.0", server_port=port)
scripts/demo_concept_1.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ FILE=~/data/xournalpp_htr/datasets/tests/test_1.xoj
2
+ FILE=../tests/test_1.xoj
3
+
4
+ python ../xournalpp_htr/demo_concept_1.py --input-file ${FILE}
setup.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import setuptools
5
+
6
+ ## Modifies config.lua to use the appropriate paths
7
+ # Get the path of this file
8
+ htr_dir = os.path.dirname(os.path.abspath(__file__))
9
+
10
+ # Path to the config.lua file
11
+ config_file = os.path.join(htr_dir, "plugin", "config.lua")
12
+
13
+ # Fix direction of slashes, needed on Windows
14
+ htr_dir = htr_dir.replace("\\", "/")
15
+
16
+ # Get the path of the Python executable
17
+ python_executable = sys.executable.replace("\\", "/")
18
+
19
+ # Modify the config.lua file
20
+ with open(config_file, "r") as f:
21
+ lines = f.readlines()
22
+
23
+ # Modify the necessary lines in the config.lua file
24
+ modified_lines = []
25
+ for line in lines:
26
+ if line.startswith("_M.python_executable ="):
27
+ modified_lines.append('_M.python_executable = "' + python_executable + '"\n')
28
+ elif line.startswith("_M.xournalpp_htr_path ="):
29
+ modified_lines.append(
30
+ '_M.xournalpp_htr_path = "' + htr_dir + '/xournalpp_htr/run_htr.py"\n'
31
+ )
32
+ else:
33
+ modified_lines.append(line)
34
+
35
+ # Write the modified lines back to the config.lua file
36
+ with open(config_file, "w") as f:
37
+ f.writelines(modified_lines)
38
+
39
+ with open("README.md", "r") as fh:
40
+ long_description = fh.read()
41
+
42
+ setuptools.setup(
43
+ name="xournalpp_htr",
44
+ version="0.0.1",
45
+ description="Developing handwritten text recognition for Xournal++.",
46
+ long_description=long_description,
47
+ long_description_content_type="text/markdown",
48
+ packages=setuptools.find_packages(),
49
+ )
tests/.gitkeep ADDED
File without changes