Spaces:

PellelNitram
/

xournalpp_htr

Running

App Files Files Community

Martin L (GitHub Actions) commited on Oct 18, 2025

Commit

be53a00

1 Parent(s): 3c9bb11

Automated deployment from GitHub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +12 -0
.gitignore +170 -0
.pre-commit-config.yaml +9 -0
.vscode/settings.json +15 -0
Dockerfile +58 -0
INSTALL_HF_DOCKER_SPACE.sh +52 -0
INSTALL_LINUX.sh +62 -0
LICENSE +339 -0
Makefile +13 -0
README.md +6 -8
data/.gitkeep +1 -0
docs/2024-08-27-22-52_unit_calculations.xoj +0 -0
docs/ADRs/2025-10-04_design_of_huggingface_space_dockerfile.md +32 -0
docs/annotate_tool_UI_design.svg +198 -0
docs/contributing.md +74 -0
docs/data_collection.md +13 -0
docs/datasets_literature_review.md +25 -0
docs/developer_guide.md +59 -0
docs/developing_new_models.md +21 -0
docs/funding.md +9 -0
docs/huggingface_docker_space_deployment.md +52 -0
docs/images/.gitkeep +1 -0
docs/images/TODO.md +1 -0
docs/images/system_design.jpg +0 -0
docs/index.md +53 -0
docs/installation_developer.md +7 -0
docs/installation_user.md +29 -0
docs/pyinstaller_experiment.md +23 -0
docs/requirements.txt +3 -0
docs/roadmap.md +77 -0
docs/user_guide.md +21 -0
experiments/2025-02-05_writing_test/index.html +17 -0
experiments/2025-02-05_writing_test/script.js +47 -0
experiments/2025-02-05_writing_test/styles.css +19 -0
mkdocs.yml +40 -0
notebooks/experiment_with_IAM_OnDo_dataset.ipynb +329 -0
notebooks/experiment_with_clustering_for_online_word_detection.ipynb +526 -0
plugin/config.lua +14 -0
plugin/copy_to_plugin_folder.sh +16 -0
plugin/demo_config.lua +14 -0
plugin/main.lua +46 -0
plugin/plugin.ini +17 -0
pyproject.toml +20 -0
pytest.ini +11 -0
requirements.txt +9 -0
requirements_training.txt +4 -0
scripts/demo.py +230 -0
scripts/demo_concept_1.sh +4 -0
setup.py +49 -0
tests/.gitkeep +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# Ignore local data and virtual environments
+data/
+.venv/
+# Common extras you probably don't want in the image
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.git
+.gitignore

.gitignore ADDED Viewed

	@@ -0,0 +1,170 @@

+external/
+tests/data/
+data/datasets/
+.ipynb_checkpoints/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.gradio/
+best_model.pth
+.DS_Store

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
+    hooks:
+      - id: ruff # linter
+        types_or: [python, pyi, jupyter]
+        args: [--exit-non-zero-on-fix]
+      - id: ruff-format # formatter
+        types_or: [python, pyi, jupyter]

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "[python]": {
+    "editor.formatOnSave": true,
+    "editor.codeActionsOnSave": {
+      "source.fixAll": "explicit",
+      "source.organizeImports": "explicit"
+    },
+    "editor.defaultFormatter": "charliermarsh.ruff"
+  },
+  "notebook.formatOnSave.enabled": true,
+  "notebook.codeActionsOnSave": {
+    "notebook.source.fixAll": "explicit",
+    "notebook.source.organizeImports": "explicit"
+  },
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,58 @@

+# Documentation: https://huggingface.co/docs/hub/spaces-sdks-docker
+# Start from an official lightweight Python image
+FROM python:3.10-slim
+# Prevents Python from writing .pyc files and buffering stdout/stderr
+# ENV PYTHONDONTWRITEBYTECODE=1
+# ENV PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    wget \
+    unzip \
+    vim-tiny \
+    curl \
+    libgl1 \
+    libglib2.0-0 \
+    xournalpp \
+    poppler-utils \
+    && rm -rf /var/lib/apt/lists/*
+# Create and set working directory
+WORKDIR /app
+# Create temp_code_mount folder
+RUN mkdir -p /temp_code_mount
+# Install Python dependencies early for caching
+# COPY requirements.txt .
+# RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Run the INSTALL_HF_DOCKER_SPACE.sh script
+RUN bash INSTALL_HF_DOCKER_SPACE.sh
+RUN pip install matplotlib bs4 pdf2image supabase python-dotenv
+# ^- that should not be necessary!! TODO!!
+# Expose the port Gradio will run on inside Hugging Face Spaces
+EXPOSE 7860
+# Command to run Gradio app
+# Hugging Face Spaces will set PORT env var, so we use it
+CMD ["python", "scripts/demo.py"]
+# https://huggingface.co/docs/hub/spaces-sdks-docker
+# https://huggingface.co/spaces/SpacesExamples/secret-example/tree/main
+# - https://huggingface.co/spaces/SpacesExamples/secret-example/blob/main/Dockerfile

INSTALL_HF_DOCKER_SPACE.sh ADDED Viewed

	@@ -0,0 +1,52 @@

+# Based on `INSTALL_LINUX.sh` file.
+# ========
+# SETTINGS
+# ========
+HTR_PIPELINE_PATH="external/htr_pipeline"
+# ================
+# Helper functions
+# ================
+install_htr_pipeline () {
+    mkdir -p ${HTR_PIPELINE_PATH}
+    cd ${HTR_PIPELINE_PATH}
+    git clone https://github.com/githubharald/HTRPipeline.git
+    cd HTRPipeline
+    cd htr_pipeline/models
+    wget https://www.dropbox.com/s/j1hl6bppecug0sz/models.zip
+    unzip -o models.zip
+    cd ../../
+    pip install .
+    # 3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
+}
+CURRENT_DIR=$(pwd)
+# ====================
+# Installation process
+# ====================
+rm -rf ${HTR_PIPELINE_PATH}
+install_htr_pipeline
+cd ${CURRENT_DIR}
+pip install -r requirements.txt
+pip install gradio # TODO: Move to optional package in `pyproject.toml` once I use this setup.
+pip install -e .
+# ========
+# Feedback
+# ========
+echo
+echo "==========================================="
+echo "==========================================="
+echo "==========================================="
+echo
+echo "Installation complete"
+echo

INSTALL_LINUX.sh ADDED Viewed

	@@ -0,0 +1,62 @@

+# ========
+# SETTINGS
+# ========
+ENVIRONMENT_NAME="xournalpp_htr"
+HTR_PIPELINE_PATH="external/htr_pipeline"
+# ================
+# Helper functions
+# ================
+install_htr_pipeline () {
+    mkdir -p ${HTR_PIPELINE_PATH}
+    cd ${HTR_PIPELINE_PATH}
+    git clone https://github.com/githubharald/HTRPipeline.git
+    cd HTRPipeline
+    cd htr_pipeline/models
+    wget https://www.dropbox.com/s/j1hl6bppecug0sz/models.zip
+    unzip -o models.zip
+    cd ../../
+    pip install .
+    # 3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
+}
+CURRENT_DIR=$(pwd)
+# ====================
+# Installation process
+# ====================
+rm -rf ${HTR_PIPELINE_PATH}
+eval "$(conda shell.bash hook)" # enable `conda activate`, see
+                                # https://stackoverflow.com/a/56155771
+conda create --name ${ENVIRONMENT_NAME} python=3.10.11 -y
+conda activate ${ENVIRONMENT_NAME}
+install_htr_pipeline
+cd ${CURRENT_DIR}
+pip install -r requirements.txt
+pip install -e .
+pre-commit install
+cd plugin
+bash copy_to_plugin_folder.sh
+# ========
+# Feedback
+# ========
+echo
+echo "==========================================="
+echo "==========================================="
+echo "==========================================="
+echo
+echo "Installation complete"
+echo
+echo "Activate environment with:"
+echo "\"conda activate ${ENVIRONMENT_NAME}\""
+echo

LICENSE ADDED Viewed

	@@ -0,0 +1,339 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+                            NO WARRANTY
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+Also add information on how to contact you by electronic and paper mail.
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.

Makefile ADDED Viewed

	@@ -0,0 +1,13 @@

+# TODO: Fill it.
+docs:
+	mkdocs build --clean
+# TODO: Sth like https://numpy.org/doc/stable/reference/generated/numpy.mean.html#numpy.mean
+tests-installation:
+	pytest -v -k "installation"
+run-pre-commit-hooks:
+	pre-commit run --all-files
+.PHONY: docs tests-installation

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
 ---
-title: Xournalpp Htr
-emoji: 🚀
-colorFrom: yellow
-colorTo: red
 sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Xournal++ HTR
+emoji: 🐳
+colorFrom: purple
+colorTo: gray
 sdk: docker
+app_port: 7860
+---

data/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+

docs/2024-08-27-22-52_unit_calculations.xoj ADDED Viewed

Binary file (6.62 kB). View file

docs/ADRs/2025-10-04_design_of_huggingface_space_dockerfile.md ADDED Viewed

	@@ -0,0 +1,32 @@

+# Design of HuggingFace Space Dockerfile
+- Status: Ongoing
+- Deciders: Martin Lellep (@PellelNitram)
+- Drivers: Martin Lellep (@PellelNitram)
+- PRD: None
+- Date: 2025-10-04
+## Context
+*Explain the background and the context in which the decision is being made. Include any relevant information about the problem, constraints, or goals.*
+## Decisions
+*State the decision that has been made. Be clear and concise.*
+- In the future, download models at build time into the Docker image from Github release page. In the
+  very far future, pull them from HuggingFace at run-time.
+- Add `xournalpp` binary to Docker image so that the `xopp` file can be exported as PDF prior to
+  execution of the HTR pipeline.
+## Consequences
+*Describe the consequences of the decision. Include both positive and negative outcomes, as well as any trade-offs.*
+## Alternatives Considered
+*List and briefly describe other options that were considered and why they were not chosen.*
+## References
+*Include links or references to any supporting documentation, discussions, or resources.*

docs/annotate_tool_UI_design.svg ADDED Viewed

docs/contributing.md ADDED Viewed

	@@ -0,0 +1,74 @@

+# Contributing
+There are multiple ways to contribute to this project. Below, those ways are explained alongside information on how to best contribute from a codebase point of view.
+Really, we greatly appreciate any help!
+## Ways to contribute
+### Reach out
+If you have questions about how to best contribute or the slightest
+interest in contributing, then feel free to reach out to me at any time :-).
+### Issues on Github
+A great way to help out with this project is to check [open issues on Github](https://github.com/PellelNitram/xournalpp_htr/issues)
+and to try to work on them.
+If you need support with those, then please reach out to - we're very happy to help!
+## Things to consider when contributing
+### Branching strategy
+The following branching strategy is used to keep the `master` branch stable and
+allow for experimentation: `master` > `dev` > `feature branches`. This branching
+strategy is shown in the following visualisation and then explained in more detail
+in the next paragraph:
+```mermaid
+%%{init:{  "gitGraph":{ "mainBranchName":"master" }}}%%
+gitGraph
+    commit
+    commit
+    branch dev
+    commit
+    checkout dev
+    commit
+    commit
+    branch feature/awesome_new_feature
+    commit
+    checkout feature/awesome_new_feature
+    commit
+    commit
+    commit
+    checkout dev
+    merge feature/awesome_new_feature
+    commit
+    commit
+    checkout master
+    merge dev
+    commit
+    commit
+```
+In more details, this repository adheres to the following git branching strategy: The
+`master` branch remains stable and delivers a functioning product. The `dev` branch
+consists of all code that will be merged to `master` eventually where the corresponding
+features are developed in individual feature branches; the above visualisation shows an
+example feature branch called `feature/awesome_new_feature` that works on a feature
+called `awesome_new_feature`.
+Given this structure, please implement new features as feature branches and
+rebase them onto the `dev` branch prior to sending a pull request to `dev`.
+Note: The Github Actions CI/CD pipeline runs on the branches `master` and `dev`.
+### Code quality
+We try to keep up code quality as high as practically possible. For that reason, the following steps are implemented:
+- Testing. Xournal++ HTR uses `pytest` for unit, regression and integration tests.
+- Linting. Xournal++ HTR uses `ruff` for linting and code best practises. `ruff` is implemented as git pre-commit hook. Since `ruff` as pre-commit hook is configured externally with `pyproject.toml`, you can use the same settings in your IDE (e.g. VSCode) if you wish to speed up the process.
+- Formatting. Xournal++ HTR uses `ruff-format` for consistent code formatting. `ruff-format` is implemented as git pre-commit hook. Since `ruff-format` as pre-commit hook is configured externally with `pyproject.toml`, you can use the same settings in your IDE if you wish to speed up the process.

docs/data_collection.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Data collection and annotation
+<div align="center">
+<iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ?si=3xMriRxJb8TdjVui" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
+<br>
+<i>(<a href="https://youtu.be/dQw4w9WgXcQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
+</div>
+TODO

docs/datasets_literature_review.md ADDED Viewed

	@@ -0,0 +1,25 @@

+THIS DOCUMENT IS WORK IN PROGRESS AND WILL BE COMPLETED LATER ON!
+## Draft content
+In this document, I am checking lit rev for datasets to know what is around and what might need to be created for best performing models.
+TODO - *Now it gets messy*:
+- See https://chatgpt.com/c/68037a32-e49c-8009-9629-c9d38404e42b
+- https://github.com/rafaeljcdarce/HWR
+- https://martin-thoma.com/write-math/
+- (ask him) Data: The data can be downloaded from write-math.com/data. I will try to keep a relatively recent version online. You can contact me if you want the latest version. However, I should note that currently (2015-04-12) this is about 3.7GB. This means sharing the data is not that easy.
+- this seems to be constrained to single (latex) symbols; this conclusion is based on those presentations:
+    - https://raw.githubusercontent.com/MartinThoma/LaTeX-examples/refs/heads/master/presentations/Bachelor-Short/LaTeX/bachelor-short.pdf
+    - interesting ideas: https://raw.githubusercontent.com/MartinThoma/LaTeX-examples/refs/heads/master/presentations/Bachelor-Final-Presentation/LaTeX/Bachelor-Final-Presentation.pdf
+- similar to: https://detexify.kirelabs.org/classify.html
+- ask him about write-math.com; https://martin-thoma.com/write-math/#data
+- https://arxiv.org/abs/1511.09030
+- https://hwrt.readthedocs.io/
+- https://github.com/MartinThoma/hwr-experiments
+- https://hwrt.readthedocs.io/index.html
+- ! https://www.reddit.com/r/selfhosted/comments/1doy32j/document_scanning_ocr_that_works_well_with/
+    - https://www.reddit.com/r/computervision/comments/15er2y7/2023_review_of_tools_for_handwritten_text/
+- https://detexify.kirelabs.org/classify.html
+- https://github.com/kirel/detexify-data

docs/developer_guide.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Developer Guide
+## Project design
+The design of Xournal++ HTR tries to bridge the gap between both delivering a production ready product and allowing contributors to experiment with new algorithms.
+The project design involves a Lua plugin and a Python backend, see the following figure. First, the production ready product is delivered by means of an Xournal++ plugin. The plugin is fully integrated in Xournal++ and calls a Python backend that performs the actual transcription. The Python backend allows selection of various recognition models and is thereby fully extendable with new models.
+<!--
+DOESN'T WORK SOMEHOW:
+<div align="center">
+    <img src="images/system_design.jpg" width="50%">
+    <p><i>Design of xournalpp_htr.</i></p>
+</div>
+-->
+<!-- An alternative figure is shown below: -->
+```mermaid
+sequenceDiagram
+    User in Xpp-->>Xpp HTR Plugin: starts transcription process using currently open file
+    Xpp HTR Plugin -->> Xpp HTR Lua Plugin: calls
+    Xpp HTR Lua Plugin -->>Xpp HTR Python Backend: constructs command using CLI
+    Xpp HTR Python Backend -->> Xpp HTR Python Backend: Does OCR & stores PDF
+    Xpp HTR Python Backend-->>User in Xpp: Gives back control to UI
+```
+Developing a usable HTR systems requires experimentation. The project structure is set up to accommodate this need. *Note that ideas on improved project structures are appreciated.*
+The experimentation is carried out in terms of "concepts". Each concept explores a different approach to HTR and possibly improves over previous concepts, but not necessarily to allow for freedom in risky experiments. Concept 1 is already implemented and uses a computer vision approach that is explained below.
+Future concepts might explore:
+- Retrain computer vision models from concept 1 using native online data representation of [Xournal++](https://github.com/xournalpp/xournalpp)
+- Use sequence-to-sequence models to take advantage of native online data representation of [Xournal++](https://github.com/xournalpp/xournalpp); e.g. use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR)
+- Use data augmentation to increase effective size of training data
+- Use of language models to correct for spelling mistakes
+### Concept 1
+This concept uses computer vision based algorithms to first detect words on a page and then to read those words.
+The following shows a video demo on YouTube using real-life handwriting data from a Xournal file:
+[![Xournal++ HTR - Concept 1 - Demo](https://img.youtube.com/vi/FGD_O8brGNY/0.jpg)](https://www.youtube.com/watch?v=FGD_O8brGNY)
+Despite not being perfect, the main take away is that the performance is surprisingly good given that the underlying algorithm has not been optimised for Xournal++ data at all.
+**The performance is sufficiently good to be useful for the Xournal++ user base.**
+Feel free to play around with the demo yourself using [this code](https://github.com/PellelNitram/xournalpp_htr/blob/master/scripts/demo_concept_1.sh) after [installing this project](installation_user.md). The "concept 1" is also what is currently used in the plugin and shown in the [90 seconds demo](https://www.youtube.com/watch?v=boXm7lPFSRQ).
+Next steps to improve the performance of the handwritten text recognition even further could be:
+- Re-train the algorithm on Xournal++ specific data, while potentially using data augmentation.
+- Use language model to improve text encoding.
+- Use sequence-to-sequence algorithm that makes use of [Xournal++](https://github.com/xournalpp/xournalpp)'s data format. This translates into using online HTR algorithms.
+I would like to acknowledge [Harald Scheidl](https://github.com/githubharald) in this concept as he wrote the underlying algorithms and made them easily usable through [his HTRPipeline repository](https://github.com/githubharald/HTRPipeline) - after all I just feed his algorithm [Xournal++](https://github.com/xournalpp/xournalpp) data in concept 1. [Go check out his great content](https://githubharald.github.io/)!

docs/developing_new_models.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# Developing new models
+<div align="center">
+<iframe width="560" height="315" src="https://www.youtube.com/embed/dQw4w9WgXcQ?si=3xMriRxJb8TdjVui" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
+<br>
+<i>(<a href="https://youtu.be/dQw4w9WgXcQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
+</div>
+- I provide dataset and code to experiment w/ new models
+- train both your own bespoke and general models.
+## Training
+### Installation
+Follow the above installation procedure and replace the step `pip install -r requirements.txt` by both `pip install -r requirements.txt` and `pip install -r requirements_training.txt` to install both the inference and training dependencies.

docs/funding.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# Funding
+This project is mostly a solo project and I love to work on it (*please [contribute](contributing.md), if you want to - happy to help along the way!*).
+However, it is both a large time commitment and requires compute resources for training models.
+If you think this project is valuable and want to express your gratitute, then please feel free to buy me a virtual coffee [here](https://ko-fi.com/martin_l) :-).
+Thanks!!

docs/huggingface_docker_space_deployment.md ADDED Viewed

	@@ -0,0 +1,52 @@

+## Local Docker image building
+1. Build the Docker image: `docker build -t xournalpp_htr .`
+2. Run Docker image: `docker run -d -p 7860:7860 xournalpp_htr`
+    - Interactively for debugging: `docker run -it --entrypoint bash xournalpp_htr`
+3. Run Docker image for interactive development
+    - Start docker container: `docker run -it -p 7860:7860 -v $(pwd):/temp_code_mount --entrypoint bash xournalpp_htr`
+    - Call Python code inside the container: `python /temp_code_mount/scripts/demo.py`
+Generally, tidy up Docker caches with `docker system prune` if your system is full.
+## looking into adding xournalpp to the image b/c i need that for the prediction (to convert xoj/xopp to pdf):
+now cross compiled on M4
+- build image: `docker buildx build --platform linux/amd64 -t xournalpp_htr .`
+- interactively entering: `docker run -it --platform linux/amd64 -p 7860:7860 -v $(pwd):/temp_code_mount --entrypoint bash xournalpp_htr`
+- dl deb file: `wget --no-check-certificate https://github.com/xournalpp/xournalpp/releases/download/v1.2.8/xournalpp-1.2.8-Debian-bookworm-x86_64.deb`
+    - there're issues!!
+- alternative: use appimage:
+    - `wget --no-check-certificate https://github.com/xournalpp/xournalpp/releases/download/v1.2.8/xournalpp-1.2.8-x86_64.AppImage`
+## Commands to set up Supabase for event logging and data storage
+Contents of `.env` file:
+```bash
+DEMO=1
+SB_URL="https://<add here>.supabase.co"
+SB_KEY="<add here>"
+SB_BUCKET_NAME="xournalpp_htr_hf_space"
+SB_SCHEMA_NAME="public"
+SB_TABLE_NAME="xournalpp_htr_hf_space_events"
+```
+Create the events table:
+```sql
+create table public.xournalpp_htr_hf_space_events (
+  id bigserial primary key,
+  timestamp timestamptz not null,
+  demo boolean not null,
+  session_id text not null,
+  donate_data bool not null,
+  interaction text not null
+);
+```
+Create bucket:
+```
+xournalpp_htr_hf_space
+```

docs/images/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Put all images here

docs/images/TODO.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ - Add 90s qiuckstart video and document.

docs/images/system_design.jpg ADDED Viewed

docs/index.md ADDED Viewed

	@@ -0,0 +1,53 @@

+# Xournal++ HTR
+Developing [handwritten text recognition](https://en.wikipedia.org/wiki/Handwriting_recognition) for [Xournal++](https://github.com/xournalpp/xournalpp).
+*Your contributions are greatly appreciated!*
+## Xournal++ HTR in 90 seconds
+<div align="center">
+<iframe width="560" height="315" src="https://www.youtube.com/embed/boXm7lPFSRQ?si=Yg8tLBs-_1BtQKrU" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
+<br>
+<i>(<a href="https://www.youtube.com/watch?v=boXm7lPFSRQ?utm_source=docs&utm_medium=docs&utm_campaign=docs">Click here to get to video on YouTube.</a>)</i>
+</div>
+## Why Handwritten Text Recognition for Xournal++?
+A key benefit of digital note-taking is searchability, which digital handwritten notes lack
+without [handwritten text recognition (HTR)](https://en.wikipedia.org/wiki/Handwriting_recognition).
+While many commercial apps offer this feature, no open-source, privacy-focused handwriting
+app does - until now.
+The **Xournal++ HTR** project aims to bring on-device handwriting recognition to
+[Xournal++](https://xournalpp.github.io/), a leading open-source note-taking platform.
+This will make handwritten notes searchable while ensuring user privacy through local data
+processing.
+## Content of these websites
+These websites document Xournal++ HTR. In the navigation bar, you can find instructions on
+how to install the project, use the project and more advanced topics like how you can contribute
+code and own models. In the future, many of the documents will come with small videos to get you going quicker.
+<!-- To assist you in training your own models, Xournal++ HTR comes with many helper functions and -->
+<!-- convenience code infrastructure. -->
+## Cite
+If you are using Xournal++ HTR for your research, I'd appreciate if you could cite it. Use:
+```
+@software{Lellep_Xournalpp_HTR,
+  author = {Lellep, Martin},
+  title = {xournalpp_htr},
+  url = {https://github.com/PellelNitram/xournalpp_htr},
+  license = {GPL-2.0},
+}
+```
+*(Also please consider starring the project on GitHub.)*

docs/installation_developer.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# Development installation
+1. Perform the same installation steps as described in the [user installation manual](installation_user.md).
+2. Then, install developer dependencies: `pip install -r requirements_training.txt`.
+Depending on your needs, it is probably worth creating a dedicated Python environment for development. To do
+so, simply change `xournalpp_htr` from [user installation manual](installation_user.md) to another name like `xournalpp_htr_dev` when you follow the above development installation steps.

docs/installation_user.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# Installation
+This project consists of both the inference and training code. Most users will only be interested in the inference part, so that the below only comprises of the inference part that you need to execute the plugin from within Xournal++.
+The training part is optional and allows to help to train our own models which improve over time. This installation process is optional and detailed in [the developer guide](developer_guide.md#Installation).
+## Linux
+Run `bash INSTALL_LINUX.sh` from repository root directory.
+This script also installs the plugin as explained in the last point of the cross-platform installation procedure. The installation of the plugin is performed with `plugin/copy_to_plugin_folder.sh`, which can also be invoked independently of `INSTALL_LINUX.sh` for updating the plugin installation.
+## Cross-platform
+If you want to install the plugin manually, then execute the following commands:
+1. Create an environment: ``conda create --name xournalpp_htr python=3.10.11``.
+2. Use this environment: ``conda activate xournalpp_htr``.
+3. Install [HTRPipelines](https://github.com/githubharald/HTRPipeline) package using [its installation guide](https://github.com/githubharald/HTRPipeline/tree/master#installation).
+4. Install all dependencies of this package ``pip install -r requirements.txt``.
+5. Install the package in development mode with ``pip install -e .`` (do not forget the dot, '.').
+6. Install pre-commit hooks with: `pre-commit install`.
+7. Copy `plugin/` folder content to `${XOURNAL_CONFIG_PATH}/plugins/xournalpp_htr/` with `${XOURNAL_CONFIG_PATH}` being the configuration path of Xournal++, see Xournal++ manual [here](https://xournalpp.github.io/guide/file-locations/).
+8. Edit `config.lua`, setting `_M.python_executable` to your python executable **in the conda environment** and `_M.xournalpp_htr_path` to the absolute path of this repo. See the example config for details in `plugin/config.lua`.
+9. Ensure Xournal++ is on your `PATH`. See [here](https://xournalpp.github.io/guide/file-locations/) for the binary location.
+## After installation
+Confirm that the installation worked by running `make tests-installation` from repository root directory.

docs/pyinstaller_experiment.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# TODO!
+# PyInstaller Experiment
+For easier installation.
+Scope: On Linux.
+Commands I experimented with:
+```bash
+cd xournalpp_htr
+pyinstaller --onefile --add-data "../external/htr_pipeline/HTRPipeline/htr_pipeline/models:htr_pipeline/models" --hidden-import "PIL._tkinter_finder" run_htr.py
+dist/run_htr --input-file /home/martin/data/xournalpp_htr/test_1.xoj --output-file /home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp-3.pdf
+```
+This seems to work on my Ubuntu PC.
+Open questions:
+- Does it work on other linux computers?
+    - Idea: check w/ EC2/GCP-VM instances.
+- How to include the `xournalpp` binary in order to export the `xopp` file to a PDF?
+    - Idea: Let the use select the `xournalpp` path?

docs/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+mkdocs
+mkdocs-material
+mkdocs-git-revision-date-localized-plugin

docs/roadmap.md ADDED Viewed

	@@ -0,0 +1,77 @@

+On this page, we outline the project's intended roadmap. This plan helps us strategically manage our time and resources.
+Below, we present our roadmap. It may evolve over time, so we will preserve previous versions to maintain transparency.
+## Roadmap as of *2025-05-03*
+### Visual Overview
+```mermaid
+flowchart LR
+    A0(
+        Conduct
+        dataset
+        research
+    )
+    A(
+        Reimplement
+        <a href="https://github.com/githubharald/HTRPipeline">htr_pipeline</a>
+    )
+    B(
+        Classic algos w
+        <a href="https://github.com/PellelNitram/OnlineHTR">OnlineHTR</a>
+    )
+    C(
+        Start own
+        modeling
+    )
+    D(
+        Introduce
+        quality
+        measures
+    )
+    E(
+        Graph NN w
+        <a href="https://github.com/PellelNitram/OnlineHTR">OnlineHTR</a>
+    )
+    F(
+        Make
+        installation
+        easier
+    )
+    G(
+        Explore offline
+        recognition models
+        like <a href="https://arxiv.org/abs/1904.01941">CRAFT</a>
+    )
+    A --> F
+    F --> D
+    D --> A0
+    A0 --> C
+    C --> B
+    C --> E
+    C --> G
+```
+### Explanation
+This project has many potential directions, with the primary goal of delivering optimal value to users. While we are eager to implement advanced machine learning algorithms, we must first focus on usability improvements.
+Our main mid-term objective is to simplify the installation process, as users have reported it is too complex.
+Explanation of the steps:
+- **Reimplement [htr_pipeline](https://github.com/githubharald/HTRPipeline):**
+  We currently use the excellent [htr_pipeline](https://github.com/githubharald/HTRPipeline) by [Harald Scheidl](https://github.com/githubharald) for machine learning, but it being an external dependency complicates installation and them hosting model weights on Dropbox is not suitable for our needs. To address this, we plan to integrate these models directly into our project. Since the original repository lacks a license, we'll implement our own version, drawing inspiration from the existing work. This approach will deliver an easy-to-install product quickly, as we already know the requirements & model details. Additionally, it enhances our understanding of training models for both online and offline handwriting data. With our own models, we'll automate model retrieval and establish a model registry, likely using [Hugging Face](https://huggingface.co/), as part of adhering to MLOps best practices. Experimentation with new algorithms will benefit from the model registry and will occur subsequently, as it is more time-consuming.
+- **Make installation easier:**
+  We aim to make the installation process seamless across platforms, including Linux and Windows, with future support for Mac if access becomes available to us. Implementing a model registry will streamline model management and deployment, aiding future model development and enhancing ease of use while aligning with best practices.
+- **Introduce quality measures:**
+  To identify the best model, we need to quantify performance. Ideally, one metric will suffice, but two may be necessary if recognition and transcription remain separate tasks.
+- **Classic algos w [OnlineHTR](https://github.com/PellelNitram/OnlineHTR):**
+  The plan is to use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) for transcription alongside classical (non-data-driven) algorithms for recognition.
+- **Graph NN w [OnlineHTR](https://github.com/PellelNitram/OnlineHTR):**
+  We aim to use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) for transcription and a graph neural network for recognition. This approach seeks to develop a high-performing model that operates on the native online representation of handwriting.

docs/user_guide.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# Usage
+The usage of the project is fairly simple. First, there is a Python script that performs the actual work & is useful for headless operations like batch processing. Second, and probably much more useful for the average user, the Lua plugin can be used from within Xournal++ and invokes the aforementioned Python script under the hood.
+## The Lua plugin
+Details relevant for usage of the Lua plugin:
+1. Make sure to save your file in Xournal++ beforehand. The plugin will also let you know that you need to save your file first.
+2. After installation, navigate to `Plugin > Xournal++ HTR` to invoke the plugin. Then select a filename and press `Save`. Lastly, wait a wee bit until the process is finished; the Xournal++ UI will block while the plugin applies HTR to your file. If you opened Xournal++ through a command-line, you can see progress bars that show the HTR process in real-time.
+Note: Currently, the Xournal++ HTR plugin requires you to use a nightly build of Xournal++ because it uses upstream Lua API features that are not yet part of the stable build. Using the officially provided Nightly AppImag, see [here](https://xournalpp.github.io/installation/linux/), is very convenient. The plugin has been tested with the following nightly Linux build of Xournal++:
+```
+xournalpp 1.2.3+dev (583a4e47)
+└──libgtk: 3.24.20
+```
+## The Python script
+It is located in `xournalpp_htr/run_htr.py` and it features a command line interface that documents the usage of the Python script.

experiments/2025-02-05_writing_test/index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Handwritten Text App</title>
+    <link rel="stylesheet" href="styles.css">
+</head>
+<body>
+    <div class="container">
+        <h1>Handwritten Text App</h1>
+        <canvas id="canvas" width="600" height="400"></canvas>
+        <button id="exportButton">Export to JSON</button>
+    </div>
+    <script src="script.js"></script>
+</body>
+</html>

experiments/2025-02-05_writing_test/script.js ADDED Viewed

	@@ -0,0 +1,47 @@

+document.addEventListener('DOMContentLoaded', () => {
+    const canvas = document.getElementById('canvas');
+    const ctx = canvas.getContext('2d');
+    const exportButton = document.getElementById('exportButton');
+    let drawing = false;
+    const strokes = [];
+    canvas.addEventListener('mousedown', (e) => {
+        drawing = true;
+        const { offsetX, offsetY } = e;
+        const time = new Date().toISOString();
+        strokes.push({ x: offsetX, y: offsetY, time });
+        ctx.beginPath();
+        ctx.moveTo(offsetX, offsetY);
+    });
+    canvas.addEventListener('mousemove', (e) => {
+        if (!drawing) return;
+        const { offsetX, offsetY } = e;
+        const time = new Date().toISOString();
+        strokes.push({ x: offsetX, y: offsetY, time });
+        ctx.lineTo(offsetX, offsetY);
+        ctx.stroke();
+    });
+    canvas.addEventListener('mouseup', () => {
+        drawing = false;
+        ctx.closePath();
+    });
+    canvas.addEventListener('mouseleave', () => {
+        drawing = false;
+        ctx.closePath();
+    });
+    exportButton.addEventListener('click', () => {
+        const json = JSON.stringify(strokes, null, 2);
+        const blob = new Blob([json], { type: 'application/json' });
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement('a');
+        a.href = url;
+        a.download = 'strokes.json';
+        a.click();
+        URL.revokeObjectURL(url);
+    });
+});

experiments/2025-02-05_writing_test/styles.css ADDED Viewed

	@@ -0,0 +1,19 @@

+body {
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 100vh;
+    margin: 0;
+    font-family: Arial, sans-serif;
+    background-color: #f0f0f0;
+}
+.container {
+    text-align: center;
+}
+canvas {
+    border: 1px solid #000;
+    background-color: #fff;
+    cursor: crosshair;
+}

mkdocs.yml ADDED Viewed

	@@ -0,0 +1,40 @@

+site_name: Xournal++ HTR
+site_description: Developing handwritten text recognition for Xournal++
+repo_name: PellelNitram/xournalpp_htr
+repo_url: https://github.com/PellelNitram/xournalpp_htr
+edit_uri: edit/master/docs/
+strict: true
+theme:
+  name: material
+plugins:
+  - search # necessary for search to work
+  - git-revision-date-localized:
+      timezone: Europe/London
+      locale: en
+      fallback_to_build_date: false
+      enable_creation_date: true
+nav:
+  - Introduction: 'index.md'
+  - Getting Started as User:
+    - Installation: 'installation_user.md'
+    - User Guide: 'user_guide.md'
+  - Getting Started as Developer:
+    - Installation: 'installation_developer.md'
+    - Developer Guide: 'developer_guide.md'
+    # - Data Collection: 'data_collection.md' # Unclear if even needed
+    # - Developing New Models: 'developing_new_models.md' # Very unclear what to write as I haven't built anything yet
+  - Contributing: 'contributing.md'
+  - Roadmap: 'roadmap.md'
+  - Funding: 'funding.md'
+markdown_extensions:
+  - pymdownx.superfences: # To enable mermaid.js charts, see https://squidfunk.github.io/mkdocs-material/reference/diagrams/.
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format

notebooks/experiment_with_IAM_OnDo_dataset.ipynb ADDED Viewed

	@@ -0,0 +1,329 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1d03e361-cb11-49aa-9cf7-5e0a590186c5",
+   "metadata": {},
+   "source": [
+    "# Experiment w IAM OnDo dataset\n",
+    "\n",
+    "That is b/c it potentially comes with segmented word information, which is useful for a revised WordDetectorNN network.\n",
+    "\n",
+    "- [great for viewing XML files in formatted way](https://jsonformatter.org/xml-viewer/475e9e).\n",
+    "- [interesting package](https://github.com/RobinXL/inkml2img/blob/master/inkml2img.py)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1e6e7ca-882c-46a2-a4c0-79ae770b0b3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bda86c1-cb9c-45af-a7f0-4a010c6a8a1e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BASE_PATH = Path(\"/home/martin/Development/xournalpp_htr/data/datasets/IAMonDo-db-1.0/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99d9e548-c8cf-43ec-9e1d-eb2327cdb828",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inkml_path = BASE_PATH / \"001e.inkml\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f30ff098-9723-408c-97cd-1bfcbb672c7c",
+   "metadata": {},
+   "source": [
+    "*side idea: build InkML class! it'd be cool to make package from that and maybe publish it.*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cc46913-7eeb-4dc8-a19a-874ab6b5d6a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xml.etree.ElementTree as ET\n",
+    "\n",
+    "tree = ET.parse(inkml_path)\n",
+    "root = tree.getroot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27998475-db52-4e4b-9b18-63d5e5e64f56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ac612f7-3187-4851-bcd8-6c022380d2a5",
+   "metadata": {},
+   "source": [
+    "Explore `root` w [this](https://docs.python.org/3/library/xml.etree.elementtree.html):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1cbbb3e-1bcc-4360-9114-65f791b5b413",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root.tag, root.attrib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43cb98e1-b146-4bdf-89b3-d23089434570",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for child in root:\n",
+    "    print(child.tag, child.attrib)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "83de1f4d-e142-4b48-ba64-b7d623015754",
+   "metadata": {},
+   "source": [
+    "indeed, the above is the content of the file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c17d724-3916-4c62-9ea0-868d891f396d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# todo: cont exploration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7ef3f07e-dc5e-423b-a415-174696d5d5ca",
+   "metadata": {},
+   "source": [
+    "## experiment w/ loading both stroke and corresponding text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b089ed32-f24b-4f89-94af-5f7cbe5c56ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traceView = root[-1]  # to access `traceView`\n",
+    "traceView"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03fd3ed6-695b-4b4f-b70a-d9ba1d6fd4e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traceView"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8b3635f-269d-4dd8-981a-36b7553e5576",
+   "metadata": {},
+   "source": [
+    "`textblock` and `marking` seems interesting!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5642f657-f317-448c-9339-486cab2c6063",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "marking = traceView[-1]\n",
+    "marking"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a4e5d6b-b278-48eb-b4dc-54f22c38fb8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "marking[0].text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45f82ba1-5328-4227-b366-12a781fbd27f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "marking[2][0].text, marking[2][1].text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5bb7f46c-d682-4bc2-9486-fa7b6038f32c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = marking[2][2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ba6226b-44bb-4932-82da-92773c4faeb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ids_to_use = []\n",
+    "\n",
+    "for x in tmp:\n",
+    "    if x.tag == \"traceView\":\n",
+    "        ids_to_use.append(x.attrib[\"traceDataRef\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0184aded-1d9f-4bbd-b48b-11735d2b60a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ids_to_use"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52358cf2-585d-4130-b90b-f5e0ab5d8015",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traces_to_use = []\n",
+    "\n",
+    "for x in root.findall(\"trace\"):\n",
+    "    id_to_check = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"]\n",
+    "    for y in ids_to_use:\n",
+    "        if y[1:] == id_to_check:\n",
+    "            traces_to_use.append([id_to_check, x.text])\n",
+    "\n",
+    "traces_to_use.sort(key=lambda x: x[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "997d1c64-a7e1-474e-9bd1-6567b3da317b",
+   "metadata": {},
+   "source": [
+    "get dfs of traces:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f35219c-1cb2-4c5f-98e6-2eecb51b16d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs = []\n",
+    "\n",
+    "for name, trace in traces_to_use:\n",
+    "    print(name)\n",
+    "    trace = [\n",
+    "        [float(yy) for yy in xx.replace(\"-\", \" -\").split()]\n",
+    "        for xx in trace.split(\",\")\n",
+    "        if xx[0] not in [\"'\", '\"']\n",
+    "    ]\n",
+    "    df = pd.DataFrame(data=trace, columns=[\"x\", \"y\", \"t\", \"f\"])\n",
+    "\n",
+    "    dfs.append(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4aa0a02-3089-4cdc-9a9f-db749e515573",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure()\n",
+    "for df in dfs:\n",
+    "    plt.scatter(df.cumsum()[\"x\"], df.cumsum()[\"y\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d304a1f8-dbee-4d25-b264-ecb9b74d2838",
+   "metadata": {},
+   "source": [
+    "ok, apparently i have no idea what I am plotting :-D"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "143fbc12-f553-4bba-b61c-269000872a3e",
+   "metadata": {},
+   "source": [
+    "next steps:\n",
+    "- read spec of IAM On Do to learn what is stored.\n",
+    "- Read [this spec](https://www.w3.org/TR/InkML/#trace) to understand the above cryptic string and then plot it to see if it suits my needs of segmented word data."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/experiment_with_clustering_for_online_word_detection.ipynb ADDED Viewed

	@@ -0,0 +1,526 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Experiment w clustering for online word detection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.patches as patches\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.cluster import (\n",
+    "    DBSCAN,\n",
+    "    HDBSCAN,\n",
+    "    AffinityPropagation,\n",
+    "    AgglomerativeClustering,\n",
+    "    MeanShift,\n",
+    "    SpectralClustering,\n",
+    ")\n",
+    "from sklearn.metrics import adjusted_rand_score\n",
+    "\n",
+    "from xournalpp_htr.training.io import load_list_of_bboxes\n",
+    "from xournalpp_htr.training.visualise import plot_clustered_document"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Experiment structure\n",
+    "\n",
+    "### Hypothesis\n",
+    "\n",
+    "One can find an algorithm that segments strokes into words using my handwriting.\n",
+    "\n",
+    "Side note: This is useful b/c I can then use [OnlineHTR](https://github.com/PellelNitram/OnlineHTR) to transcribe the words.\n",
+    "\n",
+    "### Notebook structure\n",
+    "\n",
+    "1. Load data, incl ground truth.\n",
+    "2. Pre-compute a set of features. Later, feature engineering might be added.\n",
+    "3. Iterate over a few algorithms and measure their performance using the ground truth.\n",
+    "\n",
+    "Alternative addition later on: Manually remove strokes that're too long (in distribution sense) or too straight. That is another step because it will require a dataset with such strokes that don't belong to words."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Settings\n",
+    "\n",
+    "OUTPUT_PATH = Path(\"experiment_results\")\n",
+    "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "PLOT_RESULTS = True\n",
+    "PLOT_RESULTS = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Helper functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Add here if necessary."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load annotations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Previously, I loaded the data as `XournalppDocument` but that approach lacked ground truth data. Instead, I now load the annotated data, which comes with ground truth data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "annotated_bboxes = load_list_of_bboxes(\n",
+    "    \"../tests/data/2024-10-13_minimal.annotations.json\"\n",
+    ")\n",
+    "\n",
+    "DPI = 72  # TODO: Add this to annotations!\n",
+    "\n",
+    "# TODO: Maybe integrate `/DPI` into the x and y values? Maybe convert to cm?\n",
+    "# TODO: Add page dimensions, i.e.:\n",
+    "# - float(page.meta_data[\"width\"]) / DPI,\n",
+    "# - float(page.meta_data[\"height\"]) / DPI,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ========\n",
+    "# Figure 1\n",
+    "# ========\n",
+    "\n",
+    "length = len(annotated_bboxes[\"bboxes\"])\n",
+    "nr_2 = 4\n",
+    "nr_1 = length // nr_2 + 1\n",
+    "\n",
+    "fig, axes = plt.subplots(nrows=nr_1, ncols=nr_2, figsize=(10, 8))\n",
+    "\n",
+    "for i_bbox in range(length):\n",
+    "    bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
+    "\n",
+    "    a = axes.flatten()[i_bbox]\n",
+    "\n",
+    "    a.set_aspect(\"equal\")\n",
+    "    a.set_title(bbox[\"text\"])\n",
+    "    a.set_xlabel(\"x\")\n",
+    "    a.set_ylabel(\"-y\")\n",
+    "\n",
+    "    for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
+    "        x = bbox_stroke[\"x\"] / DPI\n",
+    "        y = bbox_stroke[\"y\"] / DPI\n",
+    "        a.scatter(x, -y, c=\"black\", s=1)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "# ========\n",
+    "# Figure 2\n",
+    "# ========\n",
+    "\n",
+    "plt.figure(figsize=(10, 8))\n",
+    "\n",
+    "a = plt.gca()\n",
+    "a.set_aspect(\"equal\")\n",
+    "a.set_xlabel(\"x\")\n",
+    "a.set_ylabel(\"-y\")\n",
+    "\n",
+    "for i_bbox in range(length):\n",
+    "    bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
+    "\n",
+    "    # Draw bbox\n",
+    "    xy = (\n",
+    "        min([bbox[\"point_1_x\"], bbox[\"point_2_x\"]]) / DPI,\n",
+    "        min([-bbox[\"point_1_y\"], -bbox[\"point_2_y\"]])\n",
+    "        / DPI,  # TODO: This messing around w/ y coord sign is annoying\n",
+    "    )\n",
+    "    dx = np.abs(bbox[\"point_1_x\"] - bbox[\"point_2_x\"]) / DPI\n",
+    "    dy = np.abs(bbox[\"point_1_y\"] - bbox[\"point_2_y\"]) / DPI\n",
+    "    a.add_patch(\n",
+    "        patches.Rectangle(xy, dx, dy, linewidth=1, edgecolor=\"r\", facecolor=\"none\")\n",
+    "    )\n",
+    "\n",
+    "    # Draw label\n",
+    "    a.text(x=xy[0], y=xy[1] + dy, s=bbox[\"text\"], c=\"red\")\n",
+    "\n",
+    "    for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
+    "        x = bbox_stroke[\"x\"] / DPI\n",
+    "        y = bbox_stroke[\"y\"] / DPI\n",
+    "        a.scatter(x, -y, c=\"black\", s=1)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Prepare list of all strokes w/ relevant meta information as ground truth. This variable serves as training data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_strokes_data = {\n",
+    "    \"x\": [],\n",
+    "    \"y\": [],\n",
+    "    \"x_mean\": [],\n",
+    "    \"y_mean\": [],\n",
+    "    \"i_bbox\": [],\n",
+    "    \"text\": [],\n",
+    "}\n",
+    "\n",
+    "for i_bbox in range(len(annotated_bboxes[\"bboxes\"])):\n",
+    "    bbox = annotated_bboxes[\"bboxes\"][i_bbox]\n",
+    "\n",
+    "    for bbox_stroke in bbox[\"bbox_strokes\"]:\n",
+    "        x = +bbox_stroke[\"x\"] / DPI\n",
+    "        y = -bbox_stroke[\"y\"] / DPI\n",
+    "\n",
+    "        df_strokes_data[\"x\"].append(x)\n",
+    "        df_strokes_data[\"y\"].append(y)\n",
+    "        df_strokes_data[\"x_mean\"].append(np.mean(x))\n",
+    "        df_strokes_data[\"y_mean\"].append(np.mean(y))\n",
+    "        df_strokes_data[\"i_bbox\"].append(i_bbox)\n",
+    "        df_strokes_data[\"text\"].append(bbox[\"text\"])\n",
+    "\n",
+    "df_train = pd.DataFrame.from_dict(df_strokes_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot the training data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 8))\n",
+    "\n",
+    "a = plt.gca()\n",
+    "a.set_aspect(\"equal\")\n",
+    "a.set_xlabel(\"x\")\n",
+    "a.set_ylabel(\"y\")\n",
+    "\n",
+    "for (i_bbox, text), df_grouped in df_train.groupby(\n",
+    "    [\"i_bbox\", \"text\"],\n",
+    "):\n",
+    "    a.scatter(df_grouped[\"x_mean\"], df_grouped[\"y_mean\"], c=\"red\", s=2, zorder=999)\n",
+    "\n",
+    "    bottom_left_x = np.inf\n",
+    "    bottom_left_y = np.inf\n",
+    "    top_right_x = -np.inf\n",
+    "    top_right_y = -np.inf\n",
+    "    for _, row in df_grouped.iterrows():\n",
+    "        a.plot(row.x, row.y)  # , c=cmap(i_row/N))\n",
+    "        if row.x.min() < bottom_left_x:\n",
+    "            bottom_left_x = row.x.min()\n",
+    "        if row.y.min() < bottom_left_y:\n",
+    "            bottom_left_y = row.y.min()\n",
+    "        if row.x.max() > top_right_x:\n",
+    "            top_right_x = row.x.max()\n",
+    "        if row.y.max() > top_right_y:\n",
+    "            top_right_y = row.y.max()\n",
+    "\n",
+    "    # Plot bounding box\n",
+    "    xy = (bottom_left_x, bottom_left_y)\n",
+    "    dx = top_right_x - bottom_left_x\n",
+    "    dy = top_right_y - bottom_left_y\n",
+    "    a.add_patch(\n",
+    "        patches.Rectangle(xy, dx, dy, linewidth=1, edgecolor=\"r\", facecolor=\"none\")\n",
+    "    )\n",
+    "\n",
+    "    # Plot text\n",
+    "    a.text(x=bottom_left_x, y=top_right_y, s=f'\"{text}\" ({i_bbox})', c=\"red\")\n",
+    "\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Iterate over clustering algorithms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "all_clusterings = [\n",
+    "    AgglomerativeClustering(\n",
+    "        n_clusters=22, distance_threshold=None\n",
+    "    ),  # I hard-code 22 b/c I counted that there're 22 clusters\n",
+    "    AgglomerativeClustering(n_clusters=10, distance_threshold=None),\n",
+    "    AgglomerativeClustering(\n",
+    "        n_clusters=None, distance_threshold=1e0\n",
+    "    ),  # One could maybe tune it by investigating nr of clusters over distance threshold; TODO: Distance threshold using distribution?!\n",
+    "    SpectralClustering(\n",
+    "        n_clusters=15,  # 21,\n",
+    "        affinity=\"nearest_neighbors\",\n",
+    "    ),\n",
+    "    SpectralClustering(\n",
+    "        n_clusters=21,  # 21,\n",
+    "        affinity=\"nearest_neighbors\",\n",
+    "    ),\n",
+    "    SpectralClustering(\n",
+    "        n_clusters=6,  # 21,\n",
+    "        affinity=\"nearest_neighbors\",\n",
+    "    ),\n",
+    "    MeanShift(\n",
+    "        bandwidth=None,\n",
+    "    ),\n",
+    "    MeanShift(\n",
+    "        bandwidth=0.1,\n",
+    "    ),\n",
+    "    MeanShift(\n",
+    "        bandwidth=1.0,\n",
+    "    ),\n",
+    "    MeanShift(\n",
+    "        bandwidth=10.0,\n",
+    "    ),\n",
+    "    AffinityPropagation(),\n",
+    "    HDBSCAN(min_cluster_size=2),\n",
+    "    # FeatureAgglomeration(\n",
+    "    #     n_clusters=None,\n",
+    "    #     distance_threshold=0.1,\n",
+    "    # ),\n",
+    "    # FeatureAgglomeration(\n",
+    "    #     n_clusters=None,\n",
+    "    #     distance_threshold=1.0,\n",
+    "    # ),\n",
+    "    # FeatureAgglomeration(\n",
+    "    #     n_clusters=None,\n",
+    "    #     distance_threshold=10.0,\n",
+    "    # ),\n",
+    "]\n",
+    "\n",
+    "all_clusterings += [DBSCAN(eps) for eps in np.logspace(-4, 1, 1000)]\n",
+    "all_clusterings += [\n",
+    "    AgglomerativeClustering(n_clusters=None, distance_threshold=DISTANCE_THRESHOLD)\n",
+    "    for DISTANCE_THRESHOLD in np.logspace(-4, 1, 1000)\n",
+    "]\n",
+    "\n",
+    "results = {\n",
+    "    \"index\": [],\n",
+    "    \"score\": [],\n",
+    "}\n",
+    "for i_clustering, clustering in enumerate(all_clusterings):\n",
+    "    print(i_clustering, clustering)\n",
+    "    clustering.fit(df_train[[\"x_mean\", \"y_mean\"]])\n",
+    "\n",
+    "    score = adjusted_rand_score(df_train[\"i_bbox\"], clustering.labels_)\n",
+    "\n",
+    "    results[\"index\"].append(i_clustering)\n",
+    "    results[\"score\"].append(score)\n",
+    "\n",
+    "    # Plotting\n",
+    "    if PLOT_RESULTS:\n",
+    "        fig, [a_ground_truth, a_predicted] = plt.subplots(1, 2, figsize=(10, 8))\n",
+    "        plot_clustered_document(\n",
+    "            a_ground_truth,\n",
+    "            a_predicted,\n",
+    "            clustering,\n",
+    "            annotated_bboxes,\n",
+    "            DPI,\n",
+    "            df_train,\n",
+    "            a_predicted_title=f\"A-RAND={score}\",\n",
+    "        )\n",
+    "        plt.savefig(OUTPUT_PATH / f\"iClustering{i_clustering}.png\")\n",
+    "        plt.close()\n",
+    "\n",
+    "results = pd.DataFrame.from_dict(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure()\n",
+    "\n",
+    "plt.scatter(results[\"index\"], results[\"score\"], c=\"red\")\n",
+    "\n",
+    "plt.xlabel(\"Index of clustering settings\")\n",
+    "plt.ylabel(\"Adjusted Rand Score (larger is better)\")\n",
+    "plt.savefig(\"2024-10-18_clustering_experiments.png\", dpi=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, check if the clusters make sense by plotting the clusters on the page of a set of pre-selected settings to test out:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO!!!\n",
+    "\n",
+    "# CONTINUE TO WORK HERE!!!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "TODO: Learning: The peak at ~800 seems to classify rows of text. This should be fine w/ OnlineHTR!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Add more stroke features. Then run large screen. Also add feature selection.\n",
+    "# TODO: Maybe add k fold?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, plot the dendrogram, see [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html#sphx-glr-auto-examples-cluster-plot-agglomerative-dendrogram-py)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, try out DBSCAN! Also see [here](https://scikit-learn.org/stable/modules/clustering.html)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Also next, try out another document to play around with."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Question: Is my OnlineHTR model robust against rotated text?! Maybe one should rotate the text first?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: It is probably worth it to write a bit of infrastructure code to experiment more (and easier and easier to compare) with these clustering approaches.\n",
+    "\n",
+    "Next: Feed these sequences to `OnlineHTR` or retrained `SimpleHTR` nmodel."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## TODOs:\n",
+    "\n",
+    "- I think next cool thing to try out is to do proper feature engineering to try to enhance the features. Using the raw strokes could be regarded as last resort but IMHO doesn't make sense b/c a stroke always has a single word attached as strokes cannot be split, which they could be if one allows clusterings on the raw datapoints instead of strokes.\n",
+    "\n",
+    "- Good source for rand score: [see here](https://stats.stackexchange.com/questions/260229/comparing-a-clustering-algorithm-partition-to-a-ground-truth-one).\n",
+    "- After finding the best clustering, do apply OnlineHTR to check how it performs!\n",
+    "- To overcome the scale issue (i.e. everyone's handwriting scale is a wee bit different), one would need to use an approach that is based on 'nearest neighbours'. This works b/c one does not write on top of existing words.\n",
+    "    - also, one could weight the x direction more in definition of closeness/distance\n",
+    "- Hook up OnlineHTR to here!\n",
+    "- I think the biggest problem for the OnlineHTR model would be the different line positions based on the way it was trained. Hence, one could maybe put extra emphasis on clusters being on similar y values.\n",
+    "- I have to say that I am unclear if a heuristic (i.e. a clustering algo w/ smartly chosen parameters) is really enough. Certainly for now, but a fully data-driven way would be better to accommodate different writers. This is probably relevant for a next iteration of the model.\n",
+    "    - E.g., is this approach robust against larger handwriting?\n",
+    "- Hyper parameters like distance threshold are probably a function of the content of the page (e.g. diagrams, written text height, etc).\n",
+    "- It would be cool to try graph NN. Also, I'd love to add more features than the mean. That might help in learning."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "xournalpp_htr",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

plugin/config.lua ADDED Viewed

	@@ -0,0 +1,14 @@

+local _M = {}
+-- user settings
+_M.python_executable = "/home/martin/anaconda3/envs/xournalpp_htr/bin/python"
+_M.xournalpp_htr_path = "/home/martin/Development/xournalpp_htr/xournalpp_htr/run_htr.py"
+_M.model = "dummy"
+_M.output_file = "/home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp.pdf"
+_M.debug_HTR_command = false
+-- TODO: allow UI to set other parameters as well of `xournalpp_htr`.
+-- TODO replace later w/ temp exported file - filename will be derived automatically
+_M.filename = "/home/martin/Development/xournalpp_htr/tests/test_1.xoj"
+return _M

plugin/copy_to_plugin_folder.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# ========
+# SETTINGS
+# ========
+TARGET_FOLDER=~/.config/xournalpp/plugins/xournalpp_htr/
+# TARGET_FOLDER=/usr/share/xournalpp/plugins/xournalpp_htr # requires `sudo`
+# ============
+# COPY PROCESS
+# ============
+mkdir -p ${TARGET_FOLDER}
+cp plugin.ini ${TARGET_FOLDER}
+cp main.lua ${TARGET_FOLDER}
+cp config.lua ${TARGET_FOLDER}

plugin/demo_config.lua ADDED Viewed

	@@ -0,0 +1,14 @@

+local _M = {}
+-- user settings
+_M.python_executable = "/home/martin/anaconda3/envs/xournalpp_htr/bin/python"
+_M.xournalpp_htr_path = "/home/martin/Development/xournalpp_htr/xournalpp_htr/run_htr.py"
+_M.model = "dummy"
+_M.output_file = "/home/martin/Development/xournalpp_htr/tests/test_1_from_Xpp.pdf"
+_M.debug_HTR_command = false
+-- TODO: allow UI to set other parameters as well of `xournalpp_htr`.
+-- TODO replace later w/ temp exported file - filename will be derived automatically
+_M.filename = "/home/martin/Development/xournalpp_htr/tests/test_1.xoj"
+return _M

plugin/main.lua ADDED Viewed

	@@ -0,0 +1,46 @@

+function initUi()
+  app.registerUi({["menu"] = "Xournal++ HTR", ["callback"] = "run", ["accelerator"] = "<Control>F1"});
+end
+function save_file(path)
+  if path:len() > 0 then
+    -- Read settings: I use this (https://stackoverflow.com/a/41176958). An
+    -- alternative could have been https://stackoverflow.com/a/41176826. Both
+    -- found using G"lua read settings file".
+    local config = require "config"
+    config.filename = '"' .. app.getDocumentStructure()['xoppFilename'] .. '"'
+    config.output_file = '"' .. path .. '"'
+    command = config.python_executable .. " " .. config.xournalpp_htr_path
+              .. " -if " .. config.filename
+              .. " -of " .. config.output_file
+    if config.debug_HTR_command then
+      print(command)
+    else
+      os.execute(command)
+    end
+  end
+end
+function run()
+  document_structure = app.getDocumentStructure()
+  if document_structure['xoppFilename']:len() == 0 then
+    app.openDialog('Please save document prior to exporting it as searchable PDF!', {"Ok"}, "", true)
+  else
+    app.fileDialogSave("save_file", "untitled.pdf")
+  end
+end
+-- TODO: Think of workflow to maximise usability for user
+-- TODO: How to store settings? Ideally permanently?
+-- TODO: Interesting code from example plugins:
+--   - Get filename: https://github.com/xournalpp/xournalpp/blob/master/plugins/Export/main.lua#L29
+--   - Toggle logic: https://github.com/xournalpp/xournalpp/blob/master/plugins/HighlightPosition/main.lua#L5
+--   - UI: https://github.com/xournalpp/xournalpp/blob/master/plugins/MigrateFontSizes/main.lua
+--   - OS interaction: https://github.com/xournalpp/xournalpp/blob/master/plugins/QuickScreenshot/main.lua

plugin/plugin.ini ADDED Viewed

	@@ -0,0 +1,17 @@

+## Based on this explanation: https://xournalpp.github.io/guide/plugins/plugins/
+[about]
+## Author / Copyright notice
+author=Martin Lellep
+description=Developing handwritten text recognition for Xournal++
+## If the plugin is packed with Xournal++, use
+## <xournalpp> then it gets the same version number
+version=0.1
+[default]
+enabled=false
+[plugin]
+mainfile=main.lua

pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[tool.ruff]
+fix = true
+show-fixes = true
+line-length = 88
+lint.select = [
+    "C",  # mccabe rules
+    "F",  # pyflakes rules
+    "E",  # pycodestyle error rules
+    "W",  # pycodestyle warning rules
+    "B",  # flake8-bugbear rules
+    "I",  # isort rules
+]
+lint.ignore = [
+    "C901",  # max-complexity-10
+    "E501",  # line-too-long
+]
+[tool.ruff.format]
+indent-style = "space"
+quote-style = "double"

pytest.ini ADDED Viewed

	@@ -0,0 +1,11 @@

+[pytest]
+minversion = 6.0
+testpaths =
+    tests
+markers =
+    slow: Marks tests as slow (select with '-m slow' and deselect with '-m "not slow"')
+    technical: Marks tests as technical tests to ensure that code features work as expected
+    correctness: Denotes tests that check physical behaviour and to ensure physical correctness
+    installation: Marks tests that confirm this package was installed correctly.
+    data: test data and its location.
+    visual_check: Marks tests that need visual checks.

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy
+beautifulsoup4
+matplotlib
+opencv-python
+pytest
+lxml
+pymupdf
+tqdm
+pre-commit

requirements_training.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pandas
+jupyter
+gradio
+gitpython

scripts/demo.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import os
+import tempfile
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+import gradio as gr
+from dotenv import load_dotenv
+from pdf2image import convert_from_path
+from supabase import Client, create_client
+from xournalpp_htr.documents import get_document
+from xournalpp_htr.models import compute_predictions
+from xournalpp_htr.utils import export_to_pdf_with_xournalpp, get_env_variable
+from xournalpp_htr.xio import write_predictions_to_PDF
+load_dotenv()
+DEMO = get_env_variable("DEMO") == "1"
+SB_URL = get_env_variable("SB_URL")
+SB_KEY = get_env_variable("SB_KEY")
+SB_BUCKET_NAME = get_env_variable("SB_BUCKET_NAME")
+SB_SCHEMA_NAME = get_env_variable("SB_SCHEMA_NAME")
+SB_TABLE_NAME = get_env_variable("SB_TABLE_NAME")
+# --- Image Processing Functions ---
+def get_temporary_directory() -> Path:
+    return Path(tempfile.gettempdir())
+def get_path_of_exported_pdf(session_id: str) -> Path:
+    return get_temporary_directory() / f"{session_id}_input_as_pdf.pdf"
+def get_path_of_pdf_with_htr(session_id: str) -> Path:
+    return get_temporary_directory() / f"{session_id}_pdf_with_htr.pdf"
+def log_interaction(
+    session_id: str,
+    donate_data: bool,
+    interaction: str,
+    document_path: str | None,
+):
+    supabase: Client = create_client(SB_URL, SB_KEY)
+    if donate_data and document_path:
+        document_path = Path(document_path)
+        destination_path = f"{session_id}{document_path.suffix}"
+        with open(document_path, "rb") as file:
+            supabase.storage.from_(SB_BUCKET_NAME).upload(
+                destination_path,
+                file,
+                {"content-type": "application/octet-stream"},
+            )
+    # Insert metadata row
+    row = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "demo": DEMO,
+        "session_id": session_id,
+        "donate_data": donate_data,
+        "interaction": interaction,
+    }
+    supabase.schema(SB_SCHEMA_NAME).table(SB_TABLE_NAME).insert(row).execute()
+def upload_document(document_path, session_id: str, donate_data: bool) -> str:
+    log_interaction(
+        session_id=session_id,
+        donate_data=donate_data,
+        interaction="upload_document",
+        document_path=document_path,
+    )
+    if document_path is None:
+        return None
+    return document_path
+def document_to_image_of_first_page(document_path, session_id):
+    """Flips the input image horizontally."""
+    log_interaction(
+        session_id=session_id,
+        donate_data=False,
+        interaction="document_to_image_of_first_page",
+        document_path=None,
+    )
+    if document_path is None:
+        return None
+    output_path = get_path_of_exported_pdf(session_id)
+    export_to_pdf_with_xournalpp(
+        Path(document_path),
+        output_path,
+    )
+    images = convert_from_path(output_path, first_page=1, last_page=1)
+    first_page = images[0]
+    return first_page
+def document_to_HTR_document_and_image_of_first_page(document_path, session_id):
+    """Rotates the input image 90 degrees counter-clockwise."""
+    log_interaction(
+        session_id=session_id,
+        donate_data=False,
+        interaction="document_to_HTR_document_and_image_of_first_page",
+        document_path=None,
+    )
+    if document_path is None:
+        return None
+    document_path = Path(document_path)
+    input_as_pdf_path = get_path_of_exported_pdf(session_id)
+    pdf_with_htr = get_path_of_pdf_with_htr(session_id)
+    document = get_document(document_path)
+    predictions = compute_predictions(
+        model_name="2024-07-18_htr_pipeline", document=document
+    )
+    write_predictions_to_PDF(
+        input_as_pdf_path,
+        pdf_with_htr,
+        predictions,
+        debug_htr=True,
+    )  # TODO: make it a generator to track progress externally like here.
+    images = convert_from_path(pdf_with_htr, first_page=1, last_page=1)
+    first_page = images[0]
+    return first_page
+def save_HTR_document_for_download(session_id):
+    log_interaction(
+        session_id=session_id,
+        donate_data=False,
+        interaction="save_HTR_document_for_download",
+        document_path=None,
+    )
+    pdf_with_htr = get_path_of_pdf_with_htr(session_id)
+    if not pdf_with_htr.exists():
+        return None
+    return str(pdf_with_htr)
+# --- Gradio UI Layout ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) Demo
+        This is an online demo of the [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) project, which strives to bring modern handwritten
+        text recognition to open-source handwritten note softwares like [Xournal++](https://xournalpp.github.io/).
+        While [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr) is natively built to be running locally, this demo deploys it online so you
+        can try it out without any installation. We do not collect any personal data (see [source code of this demo](https://github.com/PellelNitram/xournalpp_htr/blob/master/scripts/demo.py))
+        but allow you to donate your data if you want so that we can build better underlying machine learning models for all of us (all open-source, of course!).
+        Note that the HTR results are not yet perfect. This is an ongoing project and we are actively working on improving the models.
+        Currently, we are constrained by the limited amount of publicly available training data and by our working time (this is a hobby project next to our day jobs).
+        The "we" in the paragraphs above is currently really only me, [Martin Lellep](https://lellep.xyz/?utm_campaign=xppGradioDemo), the main developer of Xournal++ HTR. I really love to work on
+        [Xournal++ HTR](https://github.com/PellelNitram/xournalpp_htr)! If you think this project is valuable and want to express your gratitute, then please feel free to [buy me a virtual coffee here](https://ko-fi.com/martin_l)
+        so that I can buy more GPU power for training models and continue to let the GPUs go brrr :-).
+        """
+    )
+    session_id = gr.State(
+        value=lambda: datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        + "_"
+        + str(uuid.uuid4())
+    )
+    original_image_state = gr.State()
+    donate_data_checkbox = gr.Checkbox(
+        label="Donate Data: Help us to improve our open-source models by donating your uploaded document. Everything will be released as open-source!",
+        value=False,
+    )
+    upload_button = gr.UploadButton(
+        "1. Click to Upload an XOJ File",
+        file_types=[".xoj", ".xopp"],
+        file_count="single",
+    )
+    with gr.Row():
+        image_viewer_1 = gr.Image(
+            label="Original document", interactive=False, height=350
+        )
+        image_viewer_2 = gr.Image(
+            label="Document with HTR", interactive=False, height=350
+        )
+    with gr.Row():
+        button_1 = gr.Button("2. Export to PDF and Show First Page")
+        button_2 = gr.Button("3. Compute PDF with HTR and Show First Page")
+    button_download = gr.Button("4. Download PDF with HTR")
+    file_output = gr.File(label="Download PDF with HTR")
+    # --- Event Handlers ---
+    upload_button.upload(
+        fn=upload_document,
+        inputs=[upload_button, session_id, donate_data_checkbox],
+        outputs=original_image_state,
+    )
+    button_1.click(
+        fn=document_to_image_of_first_page,
+        inputs=[original_image_state, session_id],
+        outputs=image_viewer_1,
+    )
+    button_2.click(
+        fn=document_to_HTR_document_and_image_of_first_page,
+        inputs=[original_image_state, session_id],
+        outputs=image_viewer_2,
+    )
+    button_download.click(
+        fn=save_HTR_document_for_download,
+        inputs=session_id,
+        outputs=file_output,
+    )
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))  # Use HF-provided port or fallback
+    demo.launch(server_name="0.0.0.0", server_port=port)

scripts/demo_concept_1.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+FILE=~/data/xournalpp_htr/datasets/tests/test_1.xoj
+FILE=../tests/test_1.xoj
+python ../xournalpp_htr/demo_concept_1.py --input-file ${FILE}

setup.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import sys
+import setuptools
+## Modifies config.lua to use the appropriate paths
+# Get the path of this file
+htr_dir = os.path.dirname(os.path.abspath(__file__))
+# Path to the config.lua file
+config_file = os.path.join(htr_dir, "plugin", "config.lua")
+# Fix direction of slashes, needed on Windows
+htr_dir = htr_dir.replace("\\", "/")
+# Get the path of the Python executable
+python_executable = sys.executable.replace("\\", "/")
+# Modify the config.lua file
+with open(config_file, "r") as f:
+    lines = f.readlines()
+# Modify the necessary lines in the config.lua file
+modified_lines = []
+for line in lines:
+    if line.startswith("_M.python_executable ="):
+        modified_lines.append('_M.python_executable = "' + python_executable + '"\n')
+    elif line.startswith("_M.xournalpp_htr_path ="):
+        modified_lines.append(
+            '_M.xournalpp_htr_path = "' + htr_dir + '/xournalpp_htr/run_htr.py"\n'
+        )
+    else:
+        modified_lines.append(line)
+# Write the modified lines back to the config.lua file
+with open(config_file, "w") as f:
+    f.writelines(modified_lines)
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+setuptools.setup(
+    name="xournalpp_htr",
+    version="0.0.1",
+    description="Developing handwritten text recognition for Xournal++.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    packages=setuptools.find_packages(),
+)

tests/.gitkeep ADDED Viewed

File without changes