minhpn commited on
Commit
046723b
·
1 Parent(s): 2ce52ef
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +63 -0
  2. .github/FUNDING.yml +3 -0
  3. .github/ISSUE_TEMPLATE/bug_report.md +62 -0
  4. .github/ISSUE_TEMPLATE/feature_request.md +23 -0
  5. .github/dependabot.yml +14 -0
  6. .github/test/Dockerfile-alpine +34 -0
  7. .github/workflows/codeql-analysis.yml +62 -0
  8. .github/workflows/containers.yml +144 -0
  9. .github/workflows/pypi-release.yml +80 -0
  10. .github/workflows/test-container-build.yml +80 -0
  11. .github/workflows/test-only.yml +44 -0
  12. .github/workflows/test-stack-reusable-workflow.yml +240 -0
  13. .gitignore +30 -0
  14. .pre-commit-config.yaml +9 -0
  15. .ruff.toml +48 -0
  16. COMMERCIAL_LICENCE.md +54 -0
  17. CONTRIBUTING.md +9 -0
  18. Dockerfile +87 -0
  19. LICENSE +201 -0
  20. MANIFEST.in +24 -0
  21. README-pip.md +91 -0
  22. changedetection.py +8 -0
  23. changedetectionio/.gitignore +2 -0
  24. changedetectionio/PLUGIN_README.md +98 -0
  25. changedetectionio/__init__.py +240 -0
  26. changedetectionio/api/Import.py +62 -0
  27. changedetectionio/api/Notifications.py +145 -0
  28. changedetectionio/api/Search.py +51 -0
  29. changedetectionio/api/SystemInfo.py +54 -0
  30. changedetectionio/api/Tags.py +156 -0
  31. changedetectionio/api/Watch.py +298 -0
  32. changedetectionio/api/__init__.py +33 -0
  33. changedetectionio/api/api_schema.py +146 -0
  34. changedetectionio/api/auth.py +25 -0
  35. changedetectionio/async_update_worker.py +449 -0
  36. changedetectionio/auth_decorator.py +33 -0
  37. changedetectionio/blueprint/__init__.py +0 -0
  38. changedetectionio/blueprint/backups/__init__.py +164 -0
  39. changedetectionio/blueprint/backups/templates/overview.html +36 -0
  40. changedetectionio/blueprint/browser_steps/TODO.txt +7 -0
  41. changedetectionio/blueprint/browser_steps/__init__.py +254 -0
  42. changedetectionio/blueprint/browser_steps/browser_steps.py +497 -0
  43. changedetectionio/blueprint/check_proxies/__init__.py +124 -0
  44. changedetectionio/blueprint/imports/__init__.py +75 -0
  45. changedetectionio/blueprint/imports/importer.py +302 -0
  46. changedetectionio/blueprint/imports/templates/import.html +123 -0
  47. changedetectionio/blueprint/price_data_follower/__init__.py +35 -0
  48. changedetectionio/blueprint/rss/__init__.py +1 -0
  49. changedetectionio/blueprint/rss/blueprint.py +147 -0
  50. changedetectionio/blueprint/settings/__init__.py +145 -0
.dockerignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git/
3
+ .gitignore
4
+
5
+ # GitHub
6
+ .github/
7
+
8
+ # Byte-compiled / optimized / DLL files
9
+ **/__pycache__
10
+ **/*.py[cod]
11
+
12
+ # Caches
13
+ .mypy_cache/
14
+ .pytest_cache/
15
+ .ruff_cache/
16
+
17
+ # Distribution / packaging
18
+ build/
19
+ dist/
20
+ *.egg-info*
21
+
22
+ # Virtual environment
23
+ .env
24
+ .venv/
25
+ venv/
26
+
27
+ # IntelliJ IDEA
28
+ .idea/
29
+
30
+ # Visual Studio
31
+ .vscode/
32
+
33
+ # Test and development files
34
+ test-datastore/
35
+ tests/
36
+ docs/
37
+ *.md
38
+ !README.md
39
+
40
+ # Temporary and log files
41
+ *.log
42
+ *.tmp
43
+ tmp/
44
+ temp/
45
+
46
+ # Training data and large files
47
+ train-data/
48
+ works-data/
49
+
50
+ # Container files
51
+ Dockerfile*
52
+ docker-compose*.yml
53
+ .dockerignore
54
+
55
+ # Development certificates and keys
56
+ *.pem
57
+ *.key
58
+ *.crt
59
+ profile_output.prof
60
+
61
+ # Large binary files that shouldn't be in container
62
+ *.pdf
63
+ chrome.json
.github/FUNDING.yml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # These are supported funding model platforms
2
+
3
+ github: dgtlmoon
.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a bug report, if you don't follow this template, your report will be DELETED
4
+ title: ''
5
+ labels: 'triage'
6
+ assignees: 'dgtlmoon'
7
+
8
+ ---
9
+
10
+ **DO NOT USE THIS FORM TO REPORT THAT A PARTICULAR WEBSITE IS NOT SCRAPING/WATCHING AS EXPECTED**
11
+
12
+ This form is only for direct bugs and feature requests todo directly with the software.
13
+
14
+ Please report watched websites (full URL and _any_ settings) that do not work with changedetection.io as expected [**IN THE DISCUSSION FORUMS**](https://github.com/dgtlmoon/changedetection.io/discussions) or your report will be deleted
15
+
16
+ CONSIDER TAKING OUT A SUBSCRIPTION FOR A SMALL PRICE PER MONTH, YOU GET THE BENEFIT OF USING OUR PAID PROXIES AND FURTHERING THE DEVELOPMENT OF CHANGEDETECTION.IO
17
+
18
+ THANK YOU
19
+
20
+
21
+
22
+
23
+
24
+ **Describe the bug**
25
+ A clear and concise description of what the bug is.
26
+
27
+ **Version**
28
+ *Exact version* in the top right area: 0....
29
+
30
+ **How did you install?**
31
+
32
+ Docker, Pip, from source directly etc
33
+
34
+ **To Reproduce**
35
+
36
+ Steps to reproduce the behavior:
37
+ 1. Go to '...'
38
+ 2. Click on '....'
39
+ 3. Scroll down to '....'
40
+ 4. See error
41
+
42
+ ! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE - USE THE 'SHARE WATCH' FEATURE AND PASTE IN THE SHARE-LINK!
43
+
44
+ **Expected behavior**
45
+ A clear and concise description of what you expected to happen.
46
+
47
+ **Screenshots**
48
+ If applicable, add screenshots to help explain your problem.
49
+
50
+ **Desktop (please complete the following information):**
51
+ - OS: [e.g. iOS]
52
+ - Browser [e.g. chrome, safari]
53
+ - Version [e.g. 22]
54
+
55
+ **Smartphone (please complete the following information):**
56
+ - Device: [e.g. iPhone6]
57
+ - OS: [e.g. iOS8.1]
58
+ - Browser [e.g. stock browser, safari]
59
+ - Version [e.g. 22]
60
+
61
+ **Additional context**
62
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: '[feature]'
5
+ labels: 'enhancement'
6
+ assignees: ''
7
+
8
+ ---
9
+ **Version and OS**
10
+ For example, 0.123 on linux/docker
11
+
12
+ **Is your feature request related to a problem? Please describe.**
13
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
14
+
15
+ **Describe the solution you'd like**
16
+ A clear and concise description of what you want to happen.
17
+
18
+ **Describe the use-case and give concrete real-world examples**
19
+ Attach any HTML/JSON, give links to sites, screenshots etc, we are not mind readers
20
+
21
+
22
+ **Additional context**
23
+ Add any other context or screenshots about the feature request here.
.github/dependabot.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: github-actions
4
+ directory: /
5
+ schedule:
6
+ interval: "weekly"
7
+ "caronc/apprise":
8
+ versioning-strategy: "increase"
9
+ schedule:
10
+ interval: "daily"
11
+ groups:
12
+ all:
13
+ patterns:
14
+ - "*"
.github/test/Dockerfile-alpine ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile
2
+ # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
3
+ # Some packages wont install via pypi because they dont have a wheel available under this architecture.
4
+
5
+ FROM ghcr.io/linuxserver/baseimage-alpine:3.21
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ COPY requirements.txt /requirements.txt
9
+
10
+ RUN \
11
+ apk add --update --no-cache --virtual=build-dependencies \
12
+ build-base \
13
+ cargo \
14
+ git \
15
+ jpeg-dev \
16
+ libc-dev \
17
+ libffi-dev \
18
+ libxslt-dev \
19
+ openssl-dev \
20
+ python3-dev \
21
+ zip \
22
+ zlib-dev && \
23
+ apk add --update --no-cache \
24
+ libjpeg \
25
+ libxslt \
26
+ nodejs \
27
+ poppler-utils \
28
+ python3 && \
29
+ echo "**** pip3 install test of changedetection.io ****" && \
30
+ python3 -m venv /lsiopy && \
31
+ pip install -U pip wheel setuptools && \
32
+ pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
33
+ apk del --purge \
34
+ build-dependencies
.github/workflows/codeql-analysis.yml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For most projects, this workflow file will not need changing; you simply need
2
+ # to commit it to your repository.
3
+ #
4
+ # You may wish to alter this file to override the set of languages analyzed,
5
+ # or to provide custom queries or build logic.
6
+ #
7
+ # ******** NOTE ********
8
+ # We have attempted to detect the languages in your repository. Please check
9
+ # the `language` matrix defined below to confirm you have the correct set of
10
+ # supported CodeQL languages.
11
+ #
12
+ name: "CodeQL"
13
+
14
+ on:
15
+ schedule:
16
+ - cron: '27 9 * * 4'
17
+
18
+ jobs:
19
+ analyze:
20
+ name: Analyze
21
+ runs-on: ubuntu-latest
22
+
23
+ strategy:
24
+ fail-fast: false
25
+ matrix:
26
+ language: [ 'javascript', 'python' ]
27
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
28
+ # Learn more:
29
+ # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
30
+
31
+ steps:
32
+ - name: Checkout repository
33
+ uses: actions/checkout@v4
34
+
35
+ # Initializes the CodeQL tools for scanning.
36
+ - name: Initialize CodeQL
37
+ uses: github/codeql-action/init@v3
38
+ with:
39
+ languages: ${{ matrix.language }}
40
+ # If you wish to specify custom queries, you can do so here or in a config file.
41
+ # By default, queries listed here will override any specified in a config file.
42
+ # Prefix the list here with "+" to use these queries and those in the config file.
43
+ # queries: ./path/to/local/query, your-org/your-repo/queries@main
44
+
45
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
46
+ # If this step fails, then you should remove it and run the build manually (see below)
47
+ - name: Autobuild
48
+ uses: github/codeql-action/autobuild@v3
49
+
50
+ # ℹ️ Command-line programs to run using the OS shell.
51
+ # 📚 https://git.io/JvXDl
52
+
53
+ # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
54
+ # and modify them (or add more) to build your code if your project
55
+ # uses a compiled language
56
+
57
+ #- run: |
58
+ # make bootstrap
59
+ # make release
60
+
61
+ - name: Perform CodeQL Analysis
62
+ uses: github/codeql-action/analyze@v3
.github/workflows/containers.yml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and push containers
2
+
3
+ on:
4
+ # Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch
5
+ # workflow_run:
6
+ # workflows: ["ChangeDetection.io Test"]
7
+ # branches: [master]
8
+ # tags: ['0.*']
9
+ # types: [completed]
10
+
11
+ # Or a new tagged release
12
+ release:
13
+ types: [published, edited]
14
+
15
+ push:
16
+ branches:
17
+ - master
18
+
19
+ jobs:
20
+ metadata:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - name: Show metadata
24
+ run: |
25
+ echo SHA ${{ github.sha }}
26
+ echo github.ref: ${{ github.ref }}
27
+ echo github_ref: $GITHUB_REF
28
+ echo Event name: ${{ github.event_name }}
29
+ echo Ref ${{ github.ref }}
30
+ echo c: ${{ github.event.workflow_run.conclusion }}
31
+ echo r: ${{ github.event.workflow_run }}
32
+ echo tname: "${{ github.event.release.tag_name }}"
33
+ echo headbranch: -${{ github.event.workflow_run.head_branch }}-
34
+ set
35
+
36
+ build-push-containers:
37
+ runs-on: ubuntu-latest
38
+ # If the testing workflow has a success, then we build to :latest
39
+ # Or if we are in a tagged release scenario.
40
+ if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+ - name: Set up Python 3.11
44
+ uses: actions/setup-python@v5
45
+ with:
46
+ python-version: 3.11
47
+
48
+ - name: Install dependencies
49
+ run: |
50
+ python -m pip install --upgrade pip
51
+ pip install flake8 pytest
52
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
53
+
54
+ - name: Create release metadata
55
+ run: |
56
+ # COPY'ed by Dockerfile into changedetectionio/ of the image, then read by the server in store.py
57
+ echo ${{ github.sha }} > changedetectionio/source.txt
58
+ echo ${{ github.ref }} > changedetectionio/tag.txt
59
+
60
+ - name: Set up QEMU
61
+ uses: docker/setup-qemu-action@v3
62
+ with:
63
+ image: tonistiigi/binfmt:latest
64
+ platforms: all
65
+
66
+ - name: Login to GitHub Container Registry
67
+ uses: docker/login-action@v3
68
+ with:
69
+ registry: ghcr.io
70
+ username: ${{ github.actor }}
71
+ password: ${{ secrets.GITHUB_TOKEN }}
72
+
73
+ - name: Login to Docker Hub Container Registry
74
+ uses: docker/login-action@v3
75
+ with:
76
+ username: ${{ secrets.DOCKER_HUB_USERNAME }}
77
+ password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
78
+
79
+ - name: Set up Docker Buildx
80
+ id: buildx
81
+ uses: docker/setup-buildx-action@v3
82
+ with:
83
+ install: true
84
+ version: latest
85
+ driver-opts: image=moby/buildkit:master
86
+
87
+ # master branch -> :dev container tag
88
+ - name: Build and push :dev
89
+ id: docker_build
90
+ if: ${{ github.ref }} == "refs/heads/master"
91
+ uses: docker/build-push-action@v6
92
+ with:
93
+ context: ./
94
+ file: ./Dockerfile
95
+ push: true
96
+ tags: |
97
+ ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
98
+ platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
99
+ cache-from: type=gha
100
+ cache-to: type=gha,mode=max
101
+
102
+ # Looks like this was disabled
103
+ # provenance: false
104
+
105
+ # A new tagged release is required, which builds :tag and :latest
106
+ - name: Debug release info
107
+ if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
108
+ run: |
109
+ echo "Release tag: ${{ github.event.release.tag_name }}"
110
+ echo "Github ref: ${{ github.ref }}"
111
+ echo "Github ref name: ${{ github.ref_name }}"
112
+
113
+ - name: Docker meta :tag
114
+ if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
115
+ uses: docker/metadata-action@v5
116
+ id: meta
117
+ with:
118
+ images: |
119
+ ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
120
+ ghcr.io/dgtlmoon/changedetection.io
121
+ tags: |
122
+ type=semver,pattern={{version}},value=${{ github.event.release.tag_name }}
123
+ type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
124
+ type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
125
+ type=raw,value=latest
126
+
127
+ - name: Build and push :tag
128
+ id: docker_build_tag_release
129
+ if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
130
+ uses: docker/build-push-action@v6
131
+ with:
132
+ context: ./
133
+ file: ./Dockerfile
134
+ push: true
135
+ tags: ${{ steps.meta.outputs.tags }}
136
+ platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
137
+ cache-from: type=gha
138
+ cache-to: type=gha,mode=max
139
+ # Looks like this was disabled
140
+ # provenance: false
141
+
142
+ - name: Image digest
143
+ run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}
144
+
.github/workflows/pypi-release.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish Python 🐍distribution 📦 to PyPI and TestPyPI
2
+
3
+ on: push
4
+ jobs:
5
+ build:
6
+ name: Build distribution 📦
7
+ runs-on: ubuntu-latest
8
+
9
+ steps:
10
+ - uses: actions/checkout@v4
11
+ - name: Set up Python
12
+ uses: actions/setup-python@v5
13
+ with:
14
+ python-version: "3.11"
15
+ - name: Install pypa/build
16
+ run: >-
17
+ python3 -m
18
+ pip install
19
+ build
20
+ --user
21
+ - name: Build a binary wheel and a source tarball
22
+ run: python3 -m build
23
+ - name: Store the distribution packages
24
+ uses: actions/upload-artifact@v4
25
+ with:
26
+ name: python-package-distributions
27
+ path: dist/
28
+
29
+
30
+ test-pypi-package:
31
+ name: Test the built 📦 package works basically.
32
+ runs-on: ubuntu-latest
33
+ needs:
34
+ - build
35
+ steps:
36
+ - name: Download all the dists
37
+ uses: actions/download-artifact@v4
38
+ with:
39
+ name: python-package-distributions
40
+ path: dist/
41
+ - name: Set up Python 3.11
42
+ uses: actions/setup-python@v5
43
+ with:
44
+ python-version: '3.11'
45
+ - name: Test that the basic pip built package runs without error
46
+ run: |
47
+ set -ex
48
+ ls -alR
49
+
50
+ # Find and install the first .whl file
51
+ find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
52
+ changedetection.io -d /tmp -p 10000 &
53
+
54
+ sleep 3
55
+ curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
56
+ curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
57
+ killall changedetection.io
58
+
59
+
60
+ publish-to-pypi:
61
+ name: >-
62
+ Publish Python 🐍 distribution 📦 to PyPI
63
+ if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
64
+ needs:
65
+ - test-pypi-package
66
+ runs-on: ubuntu-latest
67
+ environment:
68
+ name: release
69
+ url: https://pypi.org/p/changedetection.io
70
+ permissions:
71
+ id-token: write # IMPORTANT: mandatory for trusted publishing
72
+
73
+ steps:
74
+ - name: Download all the dists
75
+ uses: actions/download-artifact@v4
76
+ with:
77
+ name: python-package-distributions
78
+ path: dist/
79
+ - name: Publish distribution 📦 to PyPI
80
+ uses: pypa/gh-action-pypi-publish@release/v1
.github/workflows/test-container-build.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ChangeDetection.io Container Build Test
2
+
3
+ # Triggers the workflow on push or pull request events
4
+
5
+ # This line doesnt work, even tho it is the documented one
6
+ #on: [push, pull_request]
7
+
8
+ on:
9
+ push:
10
+ paths:
11
+ - requirements.txt
12
+ - Dockerfile
13
+ - .github/workflows/*
14
+ - .github/test/Dockerfile*
15
+
16
+ pull_request:
17
+ paths:
18
+ - requirements.txt
19
+ - Dockerfile
20
+ - .github/workflows/*
21
+ - .github/test/Dockerfile*
22
+
23
+ # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
24
+ # @todo: some kind of path filter for requirements.txt and Dockerfile
25
+ jobs:
26
+ builder:
27
+ name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }})
28
+ runs-on: ubuntu-latest
29
+ strategy:
30
+ matrix:
31
+ include:
32
+ # Main Dockerfile platforms
33
+ - platform: linux/amd64
34
+ dockerfile: ./Dockerfile
35
+ - platform: linux/arm64
36
+ dockerfile: ./Dockerfile
37
+ - platform: linux/arm/v7
38
+ dockerfile: ./Dockerfile
39
+ - platform: linux/arm/v8
40
+ dockerfile: ./Dockerfile
41
+ - platform: linux/arm64/v8
42
+ dockerfile: ./Dockerfile
43
+ # Alpine Dockerfile platforms (musl via alpine check)
44
+ - platform: linux/amd64
45
+ dockerfile: ./.github/test/Dockerfile-alpine
46
+ - platform: linux/arm64
47
+ dockerfile: ./.github/test/Dockerfile-alpine
48
+ steps:
49
+ - uses: actions/checkout@v4
50
+ - name: Set up Python 3.11
51
+ uses: actions/setup-python@v5
52
+ with:
53
+ python-version: 3.11
54
+
55
+ # Just test that the build works, some libraries won't compile on ARM/rPi etc
56
+ - name: Set up QEMU
57
+ uses: docker/setup-qemu-action@v3
58
+ with:
59
+ image: tonistiigi/binfmt:latest
60
+ platforms: all
61
+
62
+ - name: Set up Docker Buildx
63
+ id: buildx
64
+ uses: docker/setup-buildx-action@v3
65
+ with:
66
+ install: true
67
+ version: latest
68
+ driver-opts: image=moby/buildkit:master
69
+
70
+ - name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
71
+ id: docker_build
72
+ uses: docker/build-push-action@v6
73
+ # https://github.com/docker/build-push-action#customizing
74
+ with:
75
+ context: ./
76
+ file: ${{ matrix.dockerfile }}
77
+ platforms: ${{ matrix.platform }}
78
+ cache-from: type=gha
79
+ cache-to: type=gha,mode=max
80
+
.github/workflows/test-only.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ChangeDetection.io App Test
2
+
3
+ # Triggers the workflow on push or pull request events
4
+ on: [push, pull_request]
5
+
6
+ jobs:
7
+ lint-code:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/checkout@v4
11
+ - name: Lint with Ruff
12
+ run: |
13
+ pip install ruff
14
+ # Check for syntax errors and undefined names
15
+ ruff check . --select E9,F63,F7,F82
16
+ # Complete check with errors treated as warnings
17
+ ruff check . --exit-zero
18
+
19
+ test-application-3-10:
20
+ needs: lint-code
21
+ uses: ./.github/workflows/test-stack-reusable-workflow.yml
22
+ with:
23
+ python-version: '3.10'
24
+
25
+
26
+ test-application-3-11:
27
+ needs: lint-code
28
+ uses: ./.github/workflows/test-stack-reusable-workflow.yml
29
+ with:
30
+ python-version: '3.11'
31
+
32
+ test-application-3-12:
33
+ needs: lint-code
34
+ uses: ./.github/workflows/test-stack-reusable-workflow.yml
35
+ with:
36
+ python-version: '3.12'
37
+ skip-pypuppeteer: true
38
+
39
+ test-application-3-13:
40
+ needs: lint-code
41
+ uses: ./.github/workflows/test-stack-reusable-workflow.yml
42
+ with:
43
+ python-version: '3.13'
44
+ skip-pypuppeteer: true
.github/workflows/test-stack-reusable-workflow.yml ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ChangeDetection.io App Test
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ python-version:
7
+ description: "Python version to use"
8
+ required: true
9
+ type: string
10
+ default: "3.11"
11
+ skip-pypuppeteer:
12
+ description: "Skip PyPuppeteer (not supported in 3.11/3.12)"
13
+ required: false
14
+ type: boolean
15
+ default: false
16
+
17
+ jobs:
18
+ test-application:
19
+ runs-on: ubuntu-latest
20
+ env:
21
+ PYTHON_VERSION: ${{ inputs.python-version }}
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ # Mainly just for link/flake8
26
+ - name: Set up Python ${{ env.PYTHON_VERSION }}
27
+ uses: actions/setup-python@v5
28
+ with:
29
+ python-version: ${{ env.PYTHON_VERSION }}
30
+
31
+ - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
32
+ run: |
33
+ echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
34
+ # Build a changedetection.io container and start testing inside
35
+ docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
36
+ # Debug info
37
+ docker run test-changedetectionio bash -c 'pip list'
38
+
39
+ - name: We should be Python ${{ env.PYTHON_VERSION }} ...
40
+ run: |
41
+ docker run test-changedetectionio bash -c 'python3 --version'
42
+
43
+ - name: Spin up ancillary testable services
44
+ run: |
45
+
46
+ docker network create changedet-network
47
+
48
+ # Selenium
49
+ docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
50
+
51
+ # SocketPuppetBrowser + Extra for custom browser test
52
+ docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
53
+ docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
54
+
55
+ - name: Spin up ancillary SMTP+Echo message test server
56
+ run: |
57
+ # Debug SMTP server/echo message back server
58
+ docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
59
+ docker ps
60
+
61
+ - name: Show docker container state and other debug info
62
+ run: |
63
+ set -x
64
+ echo "Running processes in docker..."
65
+ docker ps
66
+
67
+ - name: Run Unit Tests
68
+ run: |
69
+ # Unit tests
70
+ docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
71
+ docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
72
+ docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
73
+ docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
74
+
75
+ - name: Test built container with Pytest (generally as requests/plaintext fetching)
76
+ run: |
77
+ # All tests
78
+ echo "run test with pytest"
79
+ # The default pytest logger_level is TRACE
80
+ # To change logger_level for pytest(test/conftest.py),
81
+ # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
82
+ docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
83
+
84
+ # PLAYWRIGHT/NODE-> CDP
85
+ - name: Playwright and SocketPuppetBrowser - Specific tests in built container
86
+ run: |
87
+ # Playwright via Sockpuppetbrowser fetch
88
+ # tests/visualselector/test_fetch_data.py will do browser steps
89
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
90
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
91
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
92
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
93
+
94
+ - name: Playwright and SocketPuppetBrowser - Headers and requests
95
+ run: |
96
+ # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
97
+ docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
98
+
99
+ - name: Playwright and SocketPuppetBrowser - Restock detection
100
+ run: |
101
+ # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
102
+ docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
103
+
104
+ # STRAIGHT TO CDP
105
+ - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
106
+ if: ${{ inputs.skip-pypuppeteer == false }}
107
+ run: |
108
+ # Playwright via Sockpuppetbrowser fetch
109
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
110
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
111
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
112
+ docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
113
+
114
+ - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
115
+ if: ${{ inputs.skip-pypuppeteer == false }}
116
+ run: |
117
+ # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
118
+ docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
119
+
120
+ - name: Pyppeteer and SocketPuppetBrowser - Restock detection
121
+ if: ${{ inputs.skip-pypuppeteer == false }}
122
+ run: |
123
+ # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
124
+ docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
125
+
126
+ # SELENIUM
127
+ - name: Specific tests in built container for Selenium
128
+ run: |
129
+ # Selenium fetch
130
+ docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
131
+
132
+ - name: Specific tests in built container for headers and requests checks with Selenium
133
+ run: |
134
+ docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
135
+
136
+ # OTHER STUFF
137
+ - name: Test SMTP notification mime types
138
+ run: |
139
+ # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
140
+ # "mailserver" hostname defined above
141
+ docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
142
+
143
+ # @todo Add a test via playwright/puppeteer
144
+ # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
145
+ - name: Test proxy squid style interaction
146
+ run: |
147
+ cd changedetectionio
148
+ ./run_proxy_tests.sh
149
+ cd ..
150
+
151
+ - name: Test proxy SOCKS5 style interaction
152
+ run: |
153
+ cd changedetectionio
154
+ ./run_socks_proxy_tests.sh
155
+ cd ..
156
+
157
+ - name: Test custom browser URL
158
+ run: |
159
+ cd changedetectionio
160
+ ./run_custom_browser_url_tests.sh
161
+ cd ..
162
+
163
+ - name: Test changedetection.io container starts+runs basically without error
164
+ run: |
165
+ docker run --name test-changedetectionio -p 5556:7860 -d test-changedetectionio
166
+ sleep 3
167
+ # Should return 0 (no error) when grep finds it
168
+ curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
169
+
170
+ # and IPv6
171
+ curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
172
+
173
+ # Check whether TRACE log is enabled.
174
+ # Also, check whether TRACE came from STDOUT
175
+ docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
176
+ # Check whether DEBUG is came from STDOUT
177
+ docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
178
+
179
+ docker kill test-changedetectionio
180
+
181
+ - name: Test changedetection.io SIGTERM and SIGINT signal shutdown
182
+ run: |
183
+
184
+ echo SIGINT Shutdown request test
185
+ docker run --name sig-test -d test-changedetectionio
186
+ sleep 3
187
+ echo ">>> Sending SIGINT to sig-test container"
188
+ docker kill --signal=SIGINT sig-test
189
+ sleep 3
190
+ # invert the check (it should be not 0/not running)
191
+ docker ps
192
+ # check signal catch(STDERR) log. Because of
193
+ # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
194
+ docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
195
+ test -z "`docker ps|grep sig-test`"
196
+ if [ $? -ne 0 ]
197
+ then
198
+ echo "Looks like container was running when it shouldnt be"
199
+ docker ps
200
+ exit 1
201
+ fi
202
+
203
+ # @todo - scan the container log to see the right "graceful shutdown" text exists
204
+ docker rm sig-test
205
+
206
+ echo SIGTERM Shutdown request test
207
+ docker run --name sig-test -d test-changedetectionio
208
+ sleep 3
209
+ echo ">>> Sending SIGTERM to sig-test container"
210
+ docker kill --signal=SIGTERM sig-test
211
+ sleep 3
212
+ # invert the check (it should be not 0/not running)
213
+ docker ps
214
+ # check signal catch(STDERR) log. Because of
215
+ # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
216
+ docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
217
+ test -z "`docker ps|grep sig-test`"
218
+ if [ $? -ne 0 ]
219
+ then
220
+ echo "Looks like container was running when it shouldnt be"
221
+ docker ps
222
+ exit 1
223
+ fi
224
+
225
+ # @todo - scan the container log to see the right "graceful shutdown" text exists
226
+ docker rm sig-test
227
+
228
+ - name: Dump container log
229
+ if: always()
230
+ run: |
231
+ mkdir output-logs
232
+ docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
233
+ docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
234
+
235
+ - name: Store everything including test-datastore
236
+ if: always()
237
+ uses: actions/upload-artifact@v4
238
+ with:
239
+ name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
240
+ path: .
.gitignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ **/__pycache__
3
+ **/*.py[cod]
4
+
5
+ # Caches
6
+ .mypy_cache/
7
+ .pytest_cache/
8
+ .ruff_cache/
9
+
10
+ # Distribution / packaging
11
+ build/
12
+ dist/
13
+ *.egg-info*
14
+
15
+ # Virtual environment
16
+ .env
17
+ .venv/
18
+ venv/
19
+ .python-version
20
+
21
+ # IDEs
22
+ .idea
23
+ .vscode/settings.json
24
+
25
+ # Datastore files
26
+ datastore/
27
+ test-datastore/
28
+
29
+ # Memory consumption log
30
+ test-memory.log
.pre-commit-config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.11.2
4
+ hooks:
5
+ # Lint (and apply safe fixes)
6
+ - id: ruff
7
+ args: [--fix]
8
+ # Fomrat
9
+ - id: ruff-format
.ruff.toml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimum supported version
2
+ target-version = "py310"
3
+
4
+ # Formatting options
5
+ line-length = 100
6
+ indent-width = 4
7
+
8
+ exclude = [
9
+ "__pycache__",
10
+ ".eggs",
11
+ ".git",
12
+ ".tox",
13
+ ".venv",
14
+ "*.egg-info",
15
+ "*.pyc",
16
+ ]
17
+
18
+ [lint]
19
+ # https://docs.astral.sh/ruff/rules/
20
+ select = [
21
+ "B", # flake8-bugbear
22
+ "B9",
23
+ "C",
24
+ "E", # pycodestyle
25
+ "F", # Pyflakes
26
+ "I", # isort
27
+ "N", # pep8-naming
28
+ "UP", # pyupgrade
29
+ "W", # pycodestyle
30
+ ]
31
+ ignore = [
32
+ "B007", # unused-loop-control-variable
33
+ "B909", # loop-iterator-mutation
34
+ "E203", # whitespace-before-punctuation
35
+ "E266", # multiple-leading-hashes-for-block-comment
36
+ "E501", # redundant-backslash
37
+ "F403", # undefined-local-with-import-star
38
+ "N802", # invalid-function-name
39
+ "N806", # non-lowercase-variable-in-function
40
+ "N815", # mixed-case-variable-in-class-scope
41
+ ]
42
+
43
+ [lint.mccabe]
44
+ max-complexity = 12
45
+
46
+ [format]
47
+ indent-style = "space"
48
+ quote-style = "preserve"
COMMERCIAL_LICENCE.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generally
2
+
3
+ In any commercial activity involving 'Hosting' (as defined herein), whether in part or in full, this license must be executed and adhered to.
4
+
5
+ # Commercial License Agreement
6
+
7
+ This Commercial License Agreement ("Agreement") is entered into by and between Web Technologies s.r.o. here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party.
8
+
9
+ ### Definition of Hosting
10
+
11
+ For the purposes of this Agreement, "hosting" means making the functionality of the Program or modified version available to third parties as a service. This includes, without limitation:
12
+ - Enabling third parties to interact with the functionality of the Program or modified version remotely through a computer network.
13
+ - Offering a service the value of which entirely or primarily derives from the value of the Program or modified version.
14
+ - Offering a service that accomplishes for users the primary purpose of the Program or modified version.
15
+
16
+ ## 1. Grant of License
17
+ Subject to the terms and conditions of this Agreement, Licensor grants Licensee a non-exclusive, non-transferable license to install, use, and resell the Software. Licensee may:
18
+ - Resell the Software as part of a service offering or as a standalone product.
19
+ - Host the Software on a server and provide it as a hosted service (e.g., Software as a Service - SaaS).
20
+ - Integrate the Software into a larger product or service that is then sold or provided for commercial purposes, where the software is used either in part or full.
21
+
22
+ ## 2. License Fees
23
+ Licensee agrees to pay Licensor the license fees specified in the ordering document. License fees are due and payable as specified in the ordering document. The fees may include initial licensing costs and recurring fees based on the number of end users, instances of the Software resold, or revenue generated from the resale activities.
24
+
25
+ ## 3. Resale Conditions
26
+ Licensee must comply with the following conditions when reselling the Software, whether the software is resold in part or full:
27
+ - Provide end users with access to the source code under the same open-source license conditions as provided by Licensor.
28
+ - Clearly state in all marketing and sales materials that the Software is provided under a commercial license from Licensor, and provide a link back to https://changedetection.io.
29
+ - Ensure end users are aware of and agree to the terms of the commercial license prior to resale.
30
+ - Do not sublicense or transfer the Software to third parties except as part of an authorized resale activity.
31
+
32
+ ## 4. Hosting and Provision of Services
33
+ Licensee may host the Software (either in part or full) on its servers and provide it as a hosted service to end users. The following conditions apply:
34
+ - Licensee must ensure that all hosted versions of the Software comply with the terms of this Agreement.
35
+ - Licensee must provide Licensor with regular reports detailing the number of end users and instances of the hosted service.
36
+ - Any modifications to the Software made by Licensee for hosting purposes must be made available to end users under the same open-source license conditions, unless agreed otherwise.
37
+
38
+ ## 5. Services
39
+ Licensor will provide support and maintenance services as described in the support policy referenced in the ordering document should such an agreement be signed by all parties. Additional fees may apply for support services provided to end users resold by Licensee.
40
+
41
+ ## 6. Reporting and Audits
42
+ Licensee agrees to provide Licensor with regular reports detailing the number of instances, end users, and revenue generated from the resale of the Software. Licensor reserves the right to audit Licensee’s records to ensure compliance with this Agreement.
43
+
44
+ ## 7. Term and Termination
45
+ This Agreement shall commence on the effective date and continue for the period set forth in the ordering document unless terminated earlier in accordance with this Agreement. Either party may terminate this Agreement if the other party breaches any material term and fails to cure such breach within thirty (30) days after receipt of written notice.
46
+
47
+ ## 8. Limitation of Liability and Disclaimer of Warranty
48
+ Executing this commercial license does not waive the Limitation of Liability or Disclaimer of Warranty as stated in the open-source LICENSE provided with the Software. The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and noninfringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software.
49
+
50
+ ## 9. Governing Law
51
+ This Agreement shall be governed by and construed in accordance with the laws of the Czech Republic.
52
+
53
+ ## Contact Information
54
+ For commercial licensing inquiries, please contact contact@changedetection.io and dgtlmoon@gmail.com.
CONTRIBUTING.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Contributing is always welcome!
2
+
3
+ I am no professional flask developer, if you know a better way that something can be done, please let me know!
4
+
5
+ Otherwise, it's always best to PR into the `master` branch.
6
+
7
+ Please be sure that all new functionality has a matching test!
8
+
9
+ Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
Dockerfile ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip dependencies install stage
2
+
3
+ ARG PYTHON_VERSION=3.11
4
+
5
+ FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
6
+
7
+ # See `cryptography` pin comment in requirements.txt
8
+ ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
9
+
10
+ RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ g++ \
12
+ gcc \
13
+ libc-dev \
14
+ libffi-dev \
15
+ libjpeg-dev \
16
+ libssl-dev \
17
+ libxslt-dev \
18
+ make \
19
+ zlib1g-dev
20
+
21
+ RUN mkdir /install
22
+ WORKDIR /install
23
+
24
+ COPY requirements.txt /requirements.txt
25
+
26
+ # Use cache mounts and multiple wheel sources for faster ARM builds
27
+ ENV PIP_CACHE_DIR=/tmp/pip-cache
28
+ RUN --mount=type=cache,target=/tmp/pip-cache \
29
+ pip install \
30
+ --extra-index-url https://www.piwheels.org/simple \
31
+ --extra-index-url https://pypi.anaconda.org/ARM-software/simple \
32
+ --cache-dir=/tmp/pip-cache \
33
+ --target=/dependencies \
34
+ -r /requirements.txt
35
+
36
+ # Playwright is an alternative to Selenium
37
+ # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
38
+ # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
39
+ RUN --mount=type=cache,target=/tmp/pip-cache \
40
+ pip install \
41
+ --cache-dir=/tmp/pip-cache \
42
+ --target=/dependencies \
43
+ playwright~=1.48.0 \
44
+ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
45
+
46
+ # Final image stage
47
+ FROM python:${PYTHON_VERSION}-slim-bookworm
48
+ LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
49
+
50
+ RUN apt-get update && apt-get install -y --no-install-recommends \
51
+ libxslt1.1 \
52
+ # For presenting price amounts correctly in the restock/price detection overview
53
+ locales \
54
+ # For pdftohtml
55
+ poppler-utils \
56
+ zlib1g \
57
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
58
+
59
+
60
+ # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
61
+ ENV PYTHONUNBUFFERED=1
62
+
63
+ RUN useradd -m appuser
64
+ RUN mkdir -p /datastore && chown -R appuser:appuser /datastore
65
+
66
+ # Re #80, sets SECLEVEL=1 in openssl.conf to allow monitoring sites with weak/old cipher suites
67
+ RUN sed -i 's/^CipherString = .*/CipherString = DEFAULT@SECLEVEL=1/' /etc/ssl/openssl.cnf
68
+
69
+ # Copy modules over to the final image and add their dir to PYTHONPATH
70
+ COPY --from=builder /dependencies /usr/local
71
+ ENV PYTHONPATH=/usr/local
72
+
73
+ COPY changedetectionio /app/changedetectionio
74
+ COPY changedetection.py /app/changedetection.py
75
+
76
+ # Github Action test purpose(test-only.yml).
77
+ # On production, it is effectively LOGGER_LEVEL=''.
78
+ ARG LOGGER_LEVEL=''
79
+ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
80
+
81
+ USER appuser
82
+ WORKDIR /app
83
+ EXPOSE 7860
84
+
85
+ CMD ["python", "./changedetection.py", "-d", "/datastore"]
86
+
87
+
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
MANIFEST.in ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ recursive-include changedetectionio/api *
2
+ recursive-include changedetectionio/blueprint *
3
+ recursive-include changedetectionio/content_fetchers *
4
+ recursive-include changedetectionio/conditions *
5
+ recursive-include changedetectionio/model *
6
+ recursive-include changedetectionio/notification *
7
+ recursive-include changedetectionio/processors *
8
+ recursive-include changedetectionio/realtime *
9
+ recursive-include changedetectionio/static *
10
+ recursive-include changedetectionio/templates *
11
+ recursive-include changedetectionio/tests *
12
+ prune changedetectionio/static/package-lock.json
13
+ prune changedetectionio/static/styles/node_modules
14
+ prune changedetectionio/static/styles/package-lock.json
15
+ include changedetection.py
16
+ include requirements.txt
17
+ include README-pip.md
18
+ global-exclude *.pyc
19
+ global-exclude node_modules
20
+ global-exclude venv
21
+
22
+ global-exclude test-datastore
23
+ global-exclude changedetection.io*dist-info
24
+ global-exclude changedetectionio/tests/proxy_socks5/test-datastore
README-pip.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Web Site Change Detection, Monitoring and Notification.
2
+
3
+ Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
4
+
5
+ [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
6
+
7
+ [**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
8
+
9
+ ### Target specific parts of the webpage using the Visual Selector tool.
10
+
11
+ Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
12
+
13
+ [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Select parts and elements of a web page to monitor for changes" title="Select parts and elements of a web page to monitor for changes" />](https://changedetection.io?src=pip)
14
+
15
+ ### Easily see what changed, examine by word, line, or individual character.
16
+
17
+ [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://changedetection.io?src=pip)
18
+
19
+ ### Perform interactive browser steps
20
+
21
+ Fill in text boxes, click buttons and more, setup your changedetection scenario.
22
+
23
+ Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
24
+
25
+ [<img src="docs/browsersteps-anim.gif" style="max-width:100%;" alt="Website change detection with interactive browser steps, detect changes behind login and password, search queries and more" title="Website change detection with interactive browser steps, detect changes behind login and password, search queries and more" />](https://changedetection.io?src=pip)
26
+
27
+ After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in.
28
+ Requires Playwright to be enabled.
29
+
30
+ ### Example use cases
31
+
32
+ - Products and services have a change in pricing
33
+ - _Out of stock notification_ and _Back In stock notification_
34
+ - Monitor and track PDF file changes, know when a PDF file has text changes.
35
+ - Governmental department updates (changes are often only on their websites)
36
+ - New software releases, security advisories when you're not on their mailing list.
37
+ - Festivals with changes
38
+ - Discogs restock alerts and monitoring
39
+ - Realestate listing changes
40
+ - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
41
+ - COVID related news from government websites
42
+ - University/organisation news from their website
43
+ - Detect and monitor changes in JSON API responses
44
+ - JSON API monitoring and alerting
45
+ - Changes in legal and other documents
46
+ - Trigger API calls via notifications when text appears on a website
47
+ - Glue together APIs using the JSON filter and JSON notifications
48
+ - Create RSS feeds based on changes in web content
49
+ - Monitor HTML source code for unexpected changes, strengthen your PCI compliance
50
+ - You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
51
+ - Get notified when certain keywords appear in Twitter search results
52
+ - Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
53
+ - Get alerts when new job positions are open on Bamboo HR and other job platforms
54
+ - Website defacement monitoring
55
+ - Pokémon Card Restock Tracker / Pokémon TCG Tracker
56
+ - RegTech - stay ahead of regulatory changes, regulatory compliance
57
+
58
+ _Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
59
+
60
+ #### Key Features
61
+
62
+ - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
63
+ - Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
64
+ - Switch between fast non-JS and Chrome JS based "fetchers"
65
+ - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
66
+ - Easily specify how often a site should be checked
67
+ - Execute JS before extracting text (Good for logging in, see examples in the UI!)
68
+ - Override Request Headers, Specify `POST` or `GET` and other methods
69
+ - Use the "Visual Selector" to help target specific elements
70
+ - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
71
+ - Send a screenshot with the notification when a change is detected in the web page
72
+
73
+ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
74
+
75
+ [Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
76
+
77
+ Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
78
+
79
+ ```bash
80
+ $ pip3 install changedetection.io
81
+ ```
82
+
83
+ Specify a target for the _datastore path_ with `-d` (required) and a _listening port_ with `-p` (defaults to `7860`)
84
+
85
+ ```bash
86
+ $ changedetection.io -d /path/to/empty/data/dir -p 7860
87
+ ```
88
+
89
+ Then visit http://127.0.0.1:7860 , You should now be able to access the UI.
90
+
91
+ See https://changedetection.io for more information.
changedetection.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ # Only exists for direct CLI usage
4
+
5
+ import changedetectionio
6
+
7
+ if __name__ == '__main__':
8
+ changedetectionio.main()
changedetectionio/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ test-datastore
2
+ package-lock.json
changedetectionio/PLUGIN_README.md ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Creating Plugins for changedetection.io
2
+
3
+ This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
4
+
5
+ ## Plugin Types
6
+
7
+ ### UI Stats Tab Plugins
8
+
9
+ These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
10
+
11
+ #### Creating a UI Stats Tab Plugin
12
+
13
+ 1. Create a Python file in a directory that will be loaded by the plugin system.
14
+
15
+ 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
16
+
17
+ ```python
18
+ import pluggy
19
+ from loguru import logger
20
+
21
+ global_hookimpl = pluggy.HookimplMarker("changedetectionio")
22
+
23
+ @global_hookimpl
24
+ def ui_edit_stats_extras(watch):
25
+ """Add custom content to the stats tab"""
26
+ # Calculate or retrieve your stats
27
+ my_stat = calculate_something(watch)
28
+
29
+ # Return HTML content as a string
30
+ html = f"""
31
+ <div class="my-plugin-stats">
32
+ <h4>My Plugin Statistics</h4>
33
+ <p>My statistic: {my_stat}</p>
34
+ </div>
35
+ """
36
+ return html
37
+ ```
38
+
39
+ 3. The HTML you return will be included in the Stats tab.
40
+
41
+ ## Plugin Loading
42
+
43
+ Plugins can be loaded from:
44
+
45
+ 1. Built-in plugin directories in the codebase
46
+ 2. External packages using setuptools entry points
47
+
48
+ To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
49
+
50
+ ## Example Plugin
51
+
52
+ Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
53
+
54
+ ```python
55
+ import pluggy
56
+ from loguru import logger
57
+
58
+ global_hookimpl = pluggy.HookimplMarker("changedetectionio")
59
+
60
+ def count_words_in_history(watch):
61
+ """Count words in the latest snapshot"""
62
+ try:
63
+ if not watch.history.keys():
64
+ return 0
65
+
66
+ latest_key = list(watch.history.keys())[-1]
67
+ latest_content = watch.get_history_snapshot(latest_key)
68
+ return len(latest_content.split())
69
+ except Exception as e:
70
+ logger.error(f"Error counting words: {str(e)}")
71
+ return 0
72
+
73
+ @global_hookimpl
74
+ def ui_edit_stats_extras(watch):
75
+ """Add word count to the Stats tab"""
76
+ word_count = count_words_in_history(watch)
77
+
78
+ html = f"""
79
+ <div class="word-count-stats">
80
+ <h4>Content Analysis</h4>
81
+ <table class="pure-table">
82
+ <tbody>
83
+ <tr>
84
+ <td>Word count (latest snapshot)</td>
85
+ <td>{word_count}</td>
86
+ </tr>
87
+ </tbody>
88
+ </table>
89
+ </div>
90
+ """
91
+ return html
92
+ ```
93
+
94
+ ## Testing Your Plugin
95
+
96
+ 1. Place your plugin in one of the directories scanned by the plugin system
97
+ 2. Restart changedetection.io
98
+ 3. Go to the Edit page of a watch and check the Stats tab to see your content
changedetectionio/__init__.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ # Read more https://github.com/dgtlmoon/changedetection.io/wiki
4
+
5
+ __version__ = '0.50.2'
6
+
7
+ from changedetectionio.strtobool import strtobool
8
+ from json.decoder import JSONDecodeError
9
+ import os
10
+ import getopt
11
+ import platform
12
+ import signal
13
+
14
+ import sys
15
+
16
+ # Eventlet completely removed - using threading mode for SocketIO
17
+ # This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
18
+ from changedetectionio import store
19
+ from changedetectionio.flask_app import changedetection_app
20
+ from loguru import logger
21
+
22
+ # Only global so we can access it in the signal handler
23
+ app = None
24
+ datastore = None
25
+
26
+ def get_version():
27
+ return __version__
28
+
29
+ # Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
30
+ def sigshutdown_handler(_signo, _stack_frame):
31
+ name = signal.Signals(_signo).name
32
+ logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
33
+
34
+ # Set exit flag immediately to stop all loops
35
+ app.config.exit.set()
36
+ datastore.stop_thread = True
37
+
38
+ # Shutdown workers immediately
39
+ try:
40
+ from changedetectionio import worker_handler
41
+ worker_handler.shutdown_workers()
42
+ except Exception as e:
43
+ logger.error(f"Error shutting down workers: {str(e)}")
44
+
45
+ # Shutdown socketio server fast
46
+ from changedetectionio.flask_app import socketio_server
47
+ if socketio_server and hasattr(socketio_server, 'shutdown'):
48
+ try:
49
+ socketio_server.shutdown()
50
+ except Exception as e:
51
+ logger.error(f"Error shutting down Socket.IO server: {str(e)}")
52
+
53
+ # Save data quickly
54
+ try:
55
+ datastore.sync_to_json()
56
+ logger.success('Fast sync to disk complete.')
57
+ except Exception as e:
58
+ logger.error(f"Error syncing to disk: {str(e)}")
59
+
60
+ sys.exit()
61
+
62
+ def main():
63
+ global datastore
64
+ global app
65
+
66
+ datastore_path = None
67
+ do_cleanup = False
68
+ host = "0.0.0.0"
69
+ ipv6_enabled = False
70
+ port = int(os.environ.get('PORT', 7860))
71
+ ssl_mode = False
72
+
73
+ # On Windows, create and use a default path.
74
+ if os.name == 'nt':
75
+ datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
76
+ os.makedirs(datastore_path, exist_ok=True)
77
+ else:
78
+ # Must be absolute so that send_from_directory doesnt try to make it relative to backend/
79
+ datastore_path = os.path.join(os.getcwd(), "../datastore")
80
+
81
+ try:
82
+ opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
83
+ except getopt.GetoptError:
84
+ print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
85
+ sys.exit(2)
86
+
87
+ create_datastore_dir = False
88
+
89
+ # Set a default logger level
90
+ logger_level = 'DEBUG'
91
+ # Set a logger level via shell env variable
92
+ # Used: Dockerfile for CICD
93
+ # To set logger level for pytest, see the app function in tests/conftest.py
94
+ if os.getenv("LOGGER_LEVEL"):
95
+ level = os.getenv("LOGGER_LEVEL")
96
+ logger_level = int(level) if level.isdigit() else level.upper()
97
+
98
+ for opt, arg in opts:
99
+ if opt == '-s':
100
+ ssl_mode = True
101
+
102
+ if opt == '-h':
103
+ host = arg
104
+
105
+ if opt == '-p':
106
+ port = int(arg)
107
+
108
+ if opt == '-d':
109
+ datastore_path = arg
110
+
111
+ if opt == '-6':
112
+ logger.success("Enabling IPv6 listen support")
113
+ ipv6_enabled = True
114
+
115
+ # Cleanup (remove text files that arent in the index)
116
+ if opt == '-c':
117
+ do_cleanup = True
118
+
119
+ # Create the datadir if it doesnt exist
120
+ if opt == '-C':
121
+ create_datastore_dir = True
122
+
123
+ if opt == '-l':
124
+ logger_level = int(arg) if arg.isdigit() else arg.upper()
125
+
126
+ # Without this, a logger will be duplicated
127
+ logger.remove()
128
+ try:
129
+ log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
130
+ logger.configure(handlers=[
131
+ {"sink": sys.stdout, "level": logger_level,
132
+ "filter" : lambda record: record['level'].name in log_level_for_stdout},
133
+ {"sink": sys.stderr, "level": logger_level,
134
+ "filter": lambda record: record['level'].name not in log_level_for_stdout},
135
+ ])
136
+ # Catch negative number or wrong log level name
137
+ except ValueError:
138
+ print("Available log level names: TRACE, DEBUG(default), INFO, SUCCESS,"
139
+ " WARNING, ERROR, CRITICAL")
140
+ sys.exit(2)
141
+
142
+ # isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
143
+ app_config = {'datastore_path': datastore_path}
144
+
145
+ if not os.path.isdir(app_config['datastore_path']):
146
+ if create_datastore_dir:
147
+ os.mkdir(app_config['datastore_path'])
148
+ else:
149
+ logger.critical(
150
+ f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
151
+ f" does not exist, cannot start, please make sure the"
152
+ f" directory exists or specify a directory with the -d option.\n"
153
+ f"Or use the -C parameter to create the directory.")
154
+ sys.exit(2)
155
+
156
+ try:
157
+ datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
158
+ except JSONDecodeError as e:
159
+ # Dont' start if the JSON DB looks corrupt
160
+ logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
161
+ logger.critical(str(e))
162
+ return
163
+
164
+ app = changedetection_app(app_config, datastore)
165
+
166
+ # Get the SocketIO instance from the Flask app (created in flask_app.py)
167
+ from changedetectionio.flask_app import socketio_server
168
+ global socketio
169
+ socketio = socketio_server
170
+
171
+ signal.signal(signal.SIGTERM, sigshutdown_handler)
172
+ signal.signal(signal.SIGINT, sigshutdown_handler)
173
+
174
+ # Custom signal handler for memory cleanup
175
+ def sigusr_clean_handler(_signo, _stack_frame):
176
+ from changedetectionio.gc_cleanup import memory_cleanup
177
+ logger.info('SIGUSR1 received: Running memory cleanup')
178
+ return memory_cleanup(app)
179
+
180
+ # Register the SIGUSR1 signal handler
181
+ # Only register the signal handler if running on Linux
182
+ if platform.system() == "Linux":
183
+ signal.signal(signal.SIGUSR1, sigusr_clean_handler)
184
+ else:
185
+ logger.info("SIGUSR1 handler only registered on Linux, skipped.")
186
+
187
+ # Go into cleanup mode
188
+ if do_cleanup:
189
+ datastore.remove_unused_snapshots()
190
+
191
+ app.config['datastore_path'] = datastore_path
192
+
193
+
194
+ @app.context_processor
195
+ def inject_template_globals():
196
+ return dict(right_sticky="v{}".format(datastore.data['version_tag']),
197
+ new_version_available=app.config['NEW_VERSION_AVAILABLE'],
198
+ has_password=datastore.data['settings']['application']['password'] != False,
199
+ socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
200
+ )
201
+
202
+ # Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
203
+ @app.after_request
204
+ def hide_referrer(response):
205
+ if strtobool(os.getenv("HIDE_REFERER", 'false')):
206
+ response.headers["Referrer-Policy"] = "same-origin"
207
+
208
+ return response
209
+
210
+ # Proxy sub-directory support
211
+ # Set environment var USE_X_SETTINGS=1 on this script
212
+ # And then in your proxy_pass settings
213
+ #
214
+ # proxy_set_header Host "localhost";
215
+ # proxy_set_header X-Forwarded-Prefix /app;
216
+
217
+
218
+ if os.getenv('USE_X_SETTINGS'):
219
+ logger.info("USE_X_SETTINGS is ENABLED")
220
+ from werkzeug.middleware.proxy_fix import ProxyFix
221
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
222
+
223
+
224
+ # SocketIO instance is already initialized in flask_app.py
225
+
226
+ # Launch using SocketIO run method for proper integration (if enabled)
227
+ if socketio_server:
228
+ if ssl_mode:
229
+ socketio.run(app, host=host, port=int(port), debug=False,
230
+ certfile='cert.pem', keyfile='privkey.pem', allow_unsafe_werkzeug=True)
231
+ else:
232
+ socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
233
+ else:
234
+ # Run Flask app without Socket.IO if disabled
235
+ logger.info("Starting Flask app without Socket.IO server")
236
+ if ssl_mode:
237
+ app.run(host=host, port=int(port), debug=False,
238
+ ssl_context=('cert.pem', 'privkey.pem'))
239
+ else:
240
+ app.run(host=host, port=int(port), debug=False)
changedetectionio/api/Import.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from changedetectionio.strtobool import strtobool
3
+ from flask_restful import abort, Resource
4
+ from flask import request
5
+ import validators
6
+ from . import auth
7
+
8
+
9
+ class Import(Resource):
10
+ def __init__(self, **kwargs):
11
+ # datastore is a black box dependency
12
+ self.datastore = kwargs['datastore']
13
+
14
+ @auth.check_token
15
+ def post(self):
16
+ """
17
+ @api {post} /api/v1/import Import a list of watched URLs
18
+ @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
19
+ @apiExample {curl} Example usage:
20
+ curl http://localhost:7860/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
21
+ @apiName Import
22
+ @apiGroup Watch
23
+ @apiSuccess (200) {List} OK List of watch UUIDs added
24
+ @apiSuccess (500) {String} ERR Some other error
25
+ """
26
+
27
+ extras = {}
28
+
29
+ if request.args.get('proxy'):
30
+ plist = self.datastore.proxy_list
31
+ if not request.args.get('proxy') in plist:
32
+ return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
33
+ else:
34
+ extras['proxy'] = request.args.get('proxy')
35
+
36
+ dedupe = strtobool(request.args.get('dedupe', 'true'))
37
+
38
+ tags = request.args.get('tag')
39
+ tag_uuids = request.args.get('tag_uuids')
40
+
41
+ if tag_uuids:
42
+ tag_uuids = tag_uuids.split(',')
43
+
44
+ urls = request.get_data().decode('utf8').splitlines()
45
+ added = []
46
+ allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
47
+ for url in urls:
48
+ url = url.strip()
49
+ if not len(url):
50
+ continue
51
+
52
+ # If hosts that only contain alphanumerics are allowed ("localhost" for example)
53
+ if not validators.url(url, simple_host=allow_simplehost):
54
+ return f"Invalid or unsupported URL - {url}", 400
55
+
56
+ if dedupe and self.datastore.url_exists(url):
57
+ continue
58
+
59
+ new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
60
+ added.append(new_uuid)
61
+
62
+ return added
changedetectionio/api/Notifications.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask_expects_json import expects_json
2
+ from flask_restful import Resource
3
+ from . import auth
4
+ from flask_restful import abort, Resource
5
+ from flask import request
6
+ from . import auth
7
+ from . import schema_create_notification_urls, schema_delete_notification_urls
8
+
9
+ class Notifications(Resource):
10
+ def __init__(self, **kwargs):
11
+ # datastore is a black box dependency
12
+ self.datastore = kwargs['datastore']
13
+
14
+ @auth.check_token
15
+ def get(self):
16
+ """
17
+ @api {get} /api/v1/notifications Return Notification URL List
18
+ @apiDescription Return the Notification URL List from the configuration
19
+ @apiExample {curl} Example usage:
20
+ curl http://localhost:7860/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
21
+ HTTP/1.0 200
22
+ {
23
+ 'notification_urls': ["notification-urls-list"]
24
+ }
25
+ @apiName Get
26
+ @apiGroup Notifications
27
+ """
28
+
29
+ notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])
30
+
31
+ return {
32
+ 'notification_urls': notification_urls,
33
+ }, 200
34
+
35
+ @auth.check_token
36
+ @expects_json(schema_create_notification_urls)
37
+ def post(self):
38
+ """
39
+ @api {post} /api/v1/notifications Create Notification URLs
40
+ @apiDescription Add one or more notification URLs from the configuration
41
+ @apiExample {curl} Example usage:
42
+ curl http://localhost:7860/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
43
+ @apiName CreateBatch
44
+ @apiGroup Notifications
45
+ @apiSuccess (201) {Object[]} notification_urls List of added notification URLs
46
+ @apiError (400) {String} Invalid input
47
+ """
48
+
49
+ json_data = request.get_json()
50
+ notification_urls = json_data.get("notification_urls", [])
51
+
52
+ from wtforms import ValidationError
53
+ try:
54
+ validate_notification_urls(notification_urls)
55
+ except ValidationError as e:
56
+ return str(e), 400
57
+
58
+ added_urls = []
59
+
60
+ for url in notification_urls:
61
+ clean_url = url.strip()
62
+ added_url = self.datastore.add_notification_url(clean_url)
63
+ if added_url:
64
+ added_urls.append(added_url)
65
+
66
+ if not added_urls:
67
+ return "No valid notification URLs were added", 400
68
+
69
+ return {'notification_urls': added_urls}, 201
70
+
71
+ @auth.check_token
72
+ @expects_json(schema_create_notification_urls)
73
+ def put(self):
74
+ """
75
+ @api {put} /api/v1/notifications Replace Notification URLs
76
+ @apiDescription Replace all notification URLs with the provided list (can be empty)
77
+ @apiExample {curl} Example usage:
78
+ curl -X PUT http://localhost:7860/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
79
+ @apiName Replace
80
+ @apiGroup Notifications
81
+ @apiSuccess (200) {Object[]} notification_urls List of current notification URLs
82
+ @apiError (400) {String} Invalid input
83
+ """
84
+ json_data = request.get_json()
85
+ notification_urls = json_data.get("notification_urls", [])
86
+
87
+ from wtforms import ValidationError
88
+ try:
89
+ validate_notification_urls(notification_urls)
90
+ except ValidationError as e:
91
+ return str(e), 400
92
+
93
+ if not isinstance(notification_urls, list):
94
+ return "Invalid input format", 400
95
+
96
+ clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
97
+ self.datastore.data['settings']['application']['notification_urls'] = clean_urls
98
+ self.datastore.needs_write = True
99
+
100
+ return {'notification_urls': clean_urls}, 200
101
+
102
+ @auth.check_token
103
+ @expects_json(schema_delete_notification_urls)
104
+ def delete(self):
105
+ """
106
+ @api {delete} /api/v1/notifications Delete Notification URLs
107
+ @apiDescription Deletes one or more notification URLs from the configuration
108
+ @apiExample {curl} Example usage:
109
+ curl http://localhost:7860/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
110
+ @apiParam {String[]} notification_urls The notification URLs to delete.
111
+ @apiName Delete
112
+ @apiGroup Notifications
113
+ @apiSuccess (204) {String} OK Deleted
114
+ @apiError (400) {String} No matching notification URLs found.
115
+ """
116
+
117
+ json_data = request.get_json()
118
+ urls_to_delete = json_data.get("notification_urls", [])
119
+ if not isinstance(urls_to_delete, list):
120
+ abort(400, message="Expected a list of notification URLs.")
121
+
122
+ notification_urls = self.datastore.data['settings']['application'].get('notification_urls', [])
123
+ deleted = []
124
+
125
+ for url in urls_to_delete:
126
+ clean_url = url.strip()
127
+ if clean_url in notification_urls:
128
+ notification_urls.remove(clean_url)
129
+ deleted.append(clean_url)
130
+
131
+ if not deleted:
132
+ abort(400, message="No matching notification URLs found.")
133
+
134
+ self.datastore.data['settings']['application']['notification_urls'] = notification_urls
135
+ self.datastore.needs_write = True
136
+
137
+ return 'OK', 204
138
+
139
+ def validate_notification_urls(notification_urls):
140
+ from changedetectionio.forms import ValidateAppRiseServers
141
+ validator = ValidateAppRiseServers()
142
+ class DummyForm: pass
143
+ dummy_form = DummyForm()
144
+ field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})()
145
+ validator(dummy_form, field)
changedetectionio/api/Search.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask_restful import Resource, abort
2
+ from flask import request
3
+ from . import auth
4
+
5
+ class Search(Resource):
6
+ def __init__(self, **kwargs):
7
+ # datastore is a black box dependency
8
+ self.datastore = kwargs['datastore']
9
+
10
+ @auth.check_token
11
+ def get(self):
12
+ """
13
+ @api {get} /api/v1/search Search for watches
14
+ @apiDescription Search watches by URL or title text
15
+ @apiExample {curl} Example usage:
16
+ curl "http://localhost:7860/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
17
+ curl "http://localhost:7860/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
18
+ curl "http://localhost:7860/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
19
+ @apiName Search
20
+ @apiGroup Watch Management
21
+ @apiQuery {String} q Search query to match against watch URLs and titles
22
+ @apiQuery {String} [tag] Optional name of tag to limit results (name not UUID)
23
+ @apiQuery {String} [partial] Allow partial matching of URL query
24
+ @apiSuccess (200) {Object} JSON Object containing matched watches
25
+ """
26
+ query = request.args.get('q', '').strip()
27
+ tag_limit = request.args.get('tag', '').strip()
28
+ from changedetectionio.strtobool import strtobool
29
+ partial = bool(strtobool(request.args.get('partial', '0'))) if 'partial' in request.args else False
30
+
31
+ # Require a search query
32
+ if not query:
33
+ abort(400, message="Search query 'q' parameter is required")
34
+
35
+ # Use the search function from the datastore
36
+ matching_uuids = self.datastore.search_watches_for_url(query=query, tag_limit=tag_limit, partial=partial)
37
+
38
+ # Build the response with watch details
39
+ results = {}
40
+ for uuid in matching_uuids:
41
+ watch = self.datastore.data['watching'].get(uuid)
42
+ results[uuid] = {
43
+ 'last_changed': watch.last_changed,
44
+ 'last_checked': watch['last_checked'],
45
+ 'last_error': watch['last_error'],
46
+ 'title': watch['title'],
47
+ 'url': watch['url'],
48
+ 'viewed': watch.viewed
49
+ }
50
+
51
+ return results, 200
changedetectionio/api/SystemInfo.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask_restful import Resource
2
+ from . import auth
3
+
4
+
5
+ class SystemInfo(Resource):
6
+ def __init__(self, **kwargs):
7
+ # datastore is a black box dependency
8
+ self.datastore = kwargs['datastore']
9
+ self.update_q = kwargs['update_q']
10
+
11
+ @auth.check_token
12
+ def get(self):
13
+ """
14
+ @api {get} /api/v1/systeminfo Return system info
15
+ @apiDescription Return some info about the current system state
16
+ @apiExample {curl} Example usage:
17
+ curl http://localhost:7860/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
18
+ HTTP/1.0 200
19
+ {
20
+ 'queue_size': 10 ,
21
+ 'overdue_watches': ["watch-uuid-list"],
22
+ 'uptime': 38344.55,
23
+ 'watch_count': 800,
24
+ 'version': "0.40.1"
25
+ }
26
+ @apiName Get Info
27
+ @apiGroup System Information
28
+ """
29
+ import time
30
+ overdue_watches = []
31
+
32
+ # Check all watches and report which have not been checked but should have been
33
+
34
+ for uuid, watch in self.datastore.data.get('watching', {}).items():
35
+ # see if now - last_checked is greater than the time that should have been
36
+ # this is not super accurate (maybe they just edited it) but better than nothing
37
+ t = watch.threshold_seconds()
38
+ if not t:
39
+ # Use the system wide default
40
+ t = self.datastore.threshold_seconds
41
+
42
+ time_since_check = time.time() - watch.get('last_checked')
43
+
44
+ # Allow 5 minutes of grace time before we decide it's overdue
45
+ if time_since_check - (5 * 60) > t:
46
+ overdue_watches.append(uuid)
47
+ from changedetectionio import __version__ as main_version
48
+ return {
49
+ 'queue_size': self.update_q.qsize(),
50
+ 'overdue_watches': overdue_watches,
51
+ 'uptime': round(time.time() - self.datastore.start_time, 2),
52
+ 'watch_count': len(self.datastore.data.get('watching', {})),
53
+ 'version': main_version
54
+ }, 200
changedetectionio/api/Tags.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask_expects_json import expects_json
2
+ from flask_restful import abort, Resource
3
+ from flask import request
4
+ from . import auth
5
+
6
+ # Import schemas from __init__.py
7
+ from . import schema_tag, schema_create_tag, schema_update_tag
8
+
9
+
10
+ class Tag(Resource):
11
+ def __init__(self, **kwargs):
12
+ # datastore is a black box dependency
13
+ self.datastore = kwargs['datastore']
14
+
15
+ # Get information about a single tag
16
+ # curl http://localhost:7860/api/v1/tag/<string:uuid>
17
+ @auth.check_token
18
+ def get(self, uuid):
19
+ """
20
+ @api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting.
21
+ @apiDescription Retrieve tag information and set notification_muted status
22
+ @apiExample {curl} Example usage:
23
+ curl http://localhost:7860/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
24
+ curl "http://localhost:7860/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
25
+ @apiName Tag
26
+ @apiGroup Tag
27
+ @apiParam {uuid} uuid Tag unique ID.
28
+ @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
29
+ @apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag
30
+ @apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag
31
+ """
32
+ from copy import deepcopy
33
+ tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
34
+ if not tag:
35
+ abort(404, message=f'No tag exists with the UUID of {uuid}')
36
+
37
+ if request.args.get('muted', '') == 'muted':
38
+ self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
39
+ return "OK", 200
40
+ elif request.args.get('muted', '') == 'unmuted':
41
+ self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
42
+ return "OK", 200
43
+
44
+ return tag
45
+
46
+ @auth.check_token
47
+ def delete(self, uuid):
48
+ """
49
+ @api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches
50
+ @apiExample {curl} Example usage:
51
+ curl http://localhost:7860/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
52
+ @apiParam {uuid} uuid Tag unique ID.
53
+ @apiName DeleteTag
54
+ @apiGroup Tag
55
+ @apiSuccess (200) {String} OK Was deleted
56
+ """
57
+ if not self.datastore.data['settings']['application']['tags'].get(uuid):
58
+ abort(400, message='No tag exists with the UUID of {}'.format(uuid))
59
+
60
+ # Delete the tag, and any tag reference
61
+ del self.datastore.data['settings']['application']['tags'][uuid]
62
+
63
+ # Remove tag from all watches
64
+ for watch_uuid, watch in self.datastore.data['watching'].items():
65
+ if watch.get('tags') and uuid in watch['tags']:
66
+ watch['tags'].remove(uuid)
67
+
68
+ return 'OK', 204
69
+
70
+ @auth.check_token
71
+ @expects_json(schema_update_tag)
72
+ def put(self, uuid):
73
+ """
74
+ @api {put} /api/v1/tag/:uuid Update tag information
75
+ @apiExample {curl} Example usage:
76
+ Update (PUT)
77
+ curl http://localhost:7860/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}'
78
+
79
+ @apiDescription Updates an existing tag using JSON
80
+ @apiParam {uuid} uuid Tag unique ID.
81
+ @apiName UpdateTag
82
+ @apiGroup Tag
83
+ @apiSuccess (200) {String} OK Was updated
84
+ @apiSuccess (500) {String} ERR Some other error
85
+ """
86
+ tag = self.datastore.data['settings']['application']['tags'].get(uuid)
87
+ if not tag:
88
+ abort(404, message='No tag exists with the UUID of {}'.format(uuid))
89
+
90
+ tag.update(request.json)
91
+ self.datastore.needs_write_urgent = True
92
+
93
+ return "OK", 200
94
+
95
+
96
+ @auth.check_token
97
+ # Only cares for {'title': 'xxxx'}
98
+ def post(self):
99
+ """
100
+ @api {post} /api/v1/watch Create a single tag
101
+ @apiExample {curl} Example usage:
102
+ curl http://localhost:7860/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}'
103
+ @apiName Create
104
+ @apiGroup Tag
105
+ @apiSuccess (200) {String} OK Was created
106
+ @apiSuccess (500) {String} ERR Some other error
107
+ """
108
+
109
+ json_data = request.get_json()
110
+ title = json_data.get("title",'').strip()
111
+
112
+
113
+ new_uuid = self.datastore.add_tag(title=title)
114
+ if new_uuid:
115
+ return {'uuid': new_uuid}, 201
116
+ else:
117
+ return "Invalid or unsupported tag", 400
118
+
119
+ class Tags(Resource):
120
+ def __init__(self, **kwargs):
121
+ # datastore is a black box dependency
122
+ self.datastore = kwargs['datastore']
123
+
124
+ @auth.check_token
125
+ def get(self):
126
+ """
127
+ @api {get} /api/v1/tags List tags
128
+ @apiDescription Return list of available tags
129
+ @apiExample {curl} Example usage:
130
+ curl http://localhost:7860/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45"
131
+ {
132
+ "cc0cfffa-f449-477b-83ea-0caafd1dc091": {
133
+ "title": "Tech News",
134
+ "notification_muted": false,
135
+ "date_created": 1677103794
136
+ },
137
+ "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
138
+ "title": "Shopping",
139
+ "notification_muted": true,
140
+ "date_created": 1676662819
141
+ }
142
+ }
143
+ @apiName ListTags
144
+ @apiGroup Tag Management
145
+ @apiSuccess (200) {String} OK JSON dict
146
+ """
147
+ result = {}
148
+ for uuid, tag in self.datastore.data['settings']['application']['tags'].items():
149
+ result[uuid] = {
150
+ 'date_created': tag.get('date_created', 0),
151
+ 'notification_muted': tag.get('notification_muted', False),
152
+ 'title': tag.get('title', ''),
153
+ 'uuid': tag.get('uuid')
154
+ }
155
+
156
+ return result, 200
changedetectionio/api/Watch.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from changedetectionio.strtobool import strtobool
3
+
4
+ from flask_expects_json import expects_json
5
+ from changedetectionio import queuedWatchMetaData
6
+ from changedetectionio import worker_handler
7
+ from flask_restful import abort, Resource
8
+ from flask import request, make_response
9
+ import validators
10
+ from . import auth
11
+ import copy
12
+
13
+ # Import schemas from __init__.py
14
+ from . import schema, schema_create_watch, schema_update_watch
15
+
16
+
17
+ class Watch(Resource):
18
+ def __init__(self, **kwargs):
19
+ # datastore is a black box dependency
20
+ self.datastore = kwargs['datastore']
21
+ self.update_q = kwargs['update_q']
22
+
23
+ # Get information about a single watch, excluding the history list (can be large)
24
+ # curl http://localhost:7860/api/v1/watch/<string:uuid>
25
+ # @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
26
+ # ?recheck=true
27
+ @auth.check_token
28
+ def get(self, uuid):
29
+ """
30
+ @api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute.
31
+ @apiDescription Retrieve watch information and set muted/paused status
32
+ @apiExample {curl} Example usage:
33
+ curl http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
34
+ curl "http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
35
+ curl "http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
36
+ @apiName Watch
37
+ @apiGroup Watch
38
+ @apiParam {uuid} uuid Watch unique ID.
39
+ @apiQuery {Boolean} [recheck] Recheck this watch `recheck=1`
40
+ @apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state
41
+ @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
42
+ @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
43
+ @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
44
+ """
45
+ from copy import deepcopy
46
+ watch = deepcopy(self.datastore.data['watching'].get(uuid))
47
+ if not watch:
48
+ abort(404, message='No watch exists with the UUID of {}'.format(uuid))
49
+
50
+ if request.args.get('recheck'):
51
+ worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
52
+ return "OK", 200
53
+ if request.args.get('paused', '') == 'paused':
54
+ self.datastore.data['watching'].get(uuid).pause()
55
+ return "OK", 200
56
+ elif request.args.get('paused', '') == 'unpaused':
57
+ self.datastore.data['watching'].get(uuid).unpause()
58
+ return "OK", 200
59
+ if request.args.get('muted', '') == 'muted':
60
+ self.datastore.data['watching'].get(uuid).mute()
61
+ return "OK", 200
62
+ elif request.args.get('muted', '') == 'unmuted':
63
+ self.datastore.data['watching'].get(uuid).unmute()
64
+ return "OK", 200
65
+
66
+ # Return without history, get that via another API call
67
+ # Properties are not returned as a JSON, so add the required props manually
68
+ watch['history_n'] = watch.history_n
69
+ # attr .last_changed will check for the last written text snapshot on change
70
+ watch['last_changed'] = watch.last_changed
71
+ watch['viewed'] = watch.viewed
72
+ return watch
73
+
74
+ @auth.check_token
75
+ def delete(self, uuid):
76
+ """
77
+ @api {delete} /api/v1/watch/:uuid Delete a watch and related history
78
+ @apiExample {curl} Example usage:
79
+ curl http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
80
+ @apiParam {uuid} uuid Watch unique ID.
81
+ @apiName Delete
82
+ @apiGroup Watch
83
+ @apiSuccess (200) {String} OK Was deleted
84
+ """
85
+ if not self.datastore.data['watching'].get(uuid):
86
+ abort(400, message='No watch exists with the UUID of {}'.format(uuid))
87
+
88
+ self.datastore.delete(uuid)
89
+ return 'OK', 204
90
+
91
+ @auth.check_token
92
+ @expects_json(schema_update_watch)
93
+ def put(self, uuid):
94
+ """
95
+ @api {put} /api/v1/watch/:uuid Update watch information
96
+ @apiExample {curl} Example usage:
97
+ Update (PUT)
98
+ curl http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}'
99
+
100
+ @apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a>
101
+ @apiParam {uuid} uuid Watch unique ID.
102
+ @apiName Update a watch
103
+ @apiGroup Watch
104
+ @apiSuccess (200) {String} OK Was updated
105
+ @apiSuccess (500) {String} ERR Some other error
106
+ """
107
+ watch = self.datastore.data['watching'].get(uuid)
108
+ if not watch:
109
+ abort(404, message='No watch exists with the UUID of {}'.format(uuid))
110
+
111
+ if request.json.get('proxy'):
112
+ plist = self.datastore.proxy_list
113
+ if not request.json.get('proxy') in plist:
114
+ return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
115
+
116
+ watch.update(request.json)
117
+
118
+ return "OK", 200
119
+
120
+
121
+ class WatchHistory(Resource):
122
+ def __init__(self, **kwargs):
123
+ # datastore is a black box dependency
124
+ self.datastore = kwargs['datastore']
125
+
126
+ # Get a list of available history for a watch by UUID
127
+ # curl http://localhost:7860/api/v1/watch/<string:uuid>/history
128
+ @auth.check_token
129
+ def get(self, uuid):
130
+ """
131
+ @api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch
132
+ @apiDescription Requires `uuid`, returns list
133
+ @apiExample {curl} Example usage:
134
+ curl http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
135
+ {
136
+ "1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt",
137
+ "1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt",
138
+ "1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt"
139
+ }
140
+ @apiName Get list of available stored snapshots for watch
141
+ @apiGroup Watch History
142
+ @apiSuccess (200) {String} OK
143
+ @apiSuccess (404) {String} ERR Not found
144
+ """
145
+ watch = self.datastore.data['watching'].get(uuid)
146
+ if not watch:
147
+ abort(404, message='No watch exists with the UUID of {}'.format(uuid))
148
+ return watch.history, 200
149
+
150
+
151
+ class WatchSingleHistory(Resource):
152
+ def __init__(self, **kwargs):
153
+ # datastore is a black box dependency
154
+ self.datastore = kwargs['datastore']
155
+
156
+ @auth.check_token
157
+ def get(self, uuid, timestamp):
158
+ """
159
+ @api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch
160
+ @apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a>
161
+ @apiExample {curl} Example usage:
162
+ curl http://localhost:7860/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
163
+ @apiName Get single snapshot content
164
+ @apiGroup Watch History
165
+ @apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
166
+ @apiSuccess (200) {String} OK
167
+ @apiSuccess (404) {String} ERR Not found
168
+ """
169
+ watch = self.datastore.data['watching'].get(uuid)
170
+ if not watch:
171
+ abort(404, message=f"No watch exists with the UUID of {uuid}")
172
+
173
+ if not len(watch.history):
174
+ abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
175
+
176
+ if timestamp == 'latest':
177
+ timestamp = list(watch.history.keys())[-1]
178
+
179
+ if request.args.get('html'):
180
+ content = watch.get_fetched_html(timestamp)
181
+ if content:
182
+ response = make_response(content, 200)
183
+ response.mimetype = "text/html"
184
+ else:
185
+ response = make_response("No content found", 404)
186
+ response.mimetype = "text/plain"
187
+ else:
188
+ content = watch.get_history_snapshot(timestamp)
189
+ response = make_response(content, 200)
190
+ response.mimetype = "text/plain"
191
+
192
+ return response
193
+
194
+
195
+ class CreateWatch(Resource):
196
+ def __init__(self, **kwargs):
197
+ # datastore is a black box dependency
198
+ self.datastore = kwargs['datastore']
199
+ self.update_q = kwargs['update_q']
200
+
201
+ @auth.check_token
202
+ @expects_json(schema_create_watch)
203
+ def post(self):
204
+ """
205
+ @api {post} /api/v1/watch Create a single watch
206
+ @apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create.
207
+ @apiExample {curl} Example usage:
208
+ curl http://localhost:7860/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
209
+ @apiName Create
210
+ @apiGroup Watch
211
+ @apiSuccess (200) {String} OK Was created
212
+ @apiSuccess (500) {String} ERR Some other error
213
+ """
214
+
215
+ json_data = request.get_json()
216
+ url = json_data['url'].strip()
217
+
218
+ # If hosts that only contain alphanumerics are allowed ("localhost" for example)
219
+ allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
220
+ if not validators.url(url, simple_host=allow_simplehost):
221
+ return "Invalid or unsupported URL", 400
222
+
223
+ if json_data.get('proxy'):
224
+ plist = self.datastore.proxy_list
225
+ if not json_data.get('proxy') in plist:
226
+ return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
227
+
228
+ extras = copy.deepcopy(json_data)
229
+
230
+ # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
231
+ tags = None
232
+ if extras.get('tag'):
233
+ tags = extras.get('tag')
234
+ del extras['tag']
235
+
236
+ del extras['url']
237
+
238
+ new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
239
+ if new_uuid:
240
+ worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
241
+ return {'uuid': new_uuid}, 201
242
+ else:
243
+ return "Invalid or unsupported URL", 400
244
+
245
+ @auth.check_token
246
+ def get(self):
247
+ """
248
+ @api {get} /api/v1/watch List watches
249
+ @apiDescription Return concise list of available watches and some very basic info
250
+ @apiExample {curl} Example usage:
251
+ curl http://localhost:7860/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45"
252
+ {
253
+ "6a4b7d5c-fee4-4616-9f43-4ac97046b595": {
254
+ "last_changed": 1677103794,
255
+ "last_checked": 1677103794,
256
+ "last_error": false,
257
+ "title": "",
258
+ "url": "http://www.quotationspage.com/random.php"
259
+ },
260
+ "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
261
+ "last_changed": 0,
262
+ "last_checked": 1676662819,
263
+ "last_error": false,
264
+ "title": "QuickLook",
265
+ "url": "https://github.com/QL-Win/QuickLook/tags"
266
+ }
267
+ }
268
+
269
+ @apiParam {String} [recheck_all] Optional Set to =1 to force recheck of all watches
270
+ @apiParam {String} [tag] Optional name of tag to limit results
271
+ @apiName ListWatches
272
+ @apiGroup Watch Management
273
+ @apiSuccess (200) {String} OK JSON dict
274
+ """
275
+ list = {}
276
+
277
+ tag_limit = request.args.get('tag', '').lower()
278
+ for uuid, watch in self.datastore.data['watching'].items():
279
+ # Watch tags by name (replace the other calls?)
280
+ tags = self.datastore.get_all_tags_for_watch(uuid=uuid)
281
+ if tag_limit and not any(v.get('title').lower() == tag_limit for k, v in tags.items()):
282
+ continue
283
+
284
+ list[uuid] = {
285
+ 'last_changed': watch.last_changed,
286
+ 'last_checked': watch['last_checked'],
287
+ 'last_error': watch['last_error'],
288
+ 'title': watch['title'],
289
+ 'url': watch['url'],
290
+ 'viewed': watch.viewed
291
+ }
292
+
293
+ if request.args.get('recheck_all'):
294
+ for uuid in self.datastore.data['watching'].keys():
295
+ worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
296
+ return {'status': "OK"}, 200
297
+
298
+ return list, 200
changedetectionio/api/__init__.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from . import api_schema
3
+ from ..model import watch_base
4
+
5
+ # Build a JSON Schema atleast partially based on our Watch model
6
+ watch_base_config = watch_base()
7
+ schema = api_schema.build_watch_json_schema(watch_base_config)
8
+
9
+ schema_create_watch = copy.deepcopy(schema)
10
+ schema_create_watch['required'] = ['url']
11
+
12
+ schema_update_watch = copy.deepcopy(schema)
13
+ schema_update_watch['additionalProperties'] = False
14
+
15
+ # Tag schema is also based on watch_base since Tag inherits from it
16
+ schema_tag = copy.deepcopy(schema)
17
+ schema_create_tag = copy.deepcopy(schema_tag)
18
+ schema_create_tag['required'] = ['title']
19
+ schema_update_tag = copy.deepcopy(schema_tag)
20
+ schema_update_tag['additionalProperties'] = False
21
+
22
+ schema_notification_urls = copy.deepcopy(schema)
23
+ schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
24
+ schema_create_notification_urls['required'] = ['notification_urls']
25
+ schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
26
+ schema_delete_notification_urls['required'] = ['notification_urls']
27
+
28
+ # Import all API resources
29
+ from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
30
+ from .Tags import Tags, Tag
31
+ from .Import import Import
32
+ from .SystemInfo import SystemInfo
33
+ from .Notifications import Notifications
changedetectionio/api/api_schema.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
2
+ # Probably other ways to solve this when the backend switches to some ORM
3
+ from changedetectionio.notification import valid_notification_formats
4
+
5
+
6
+ def build_time_between_check_json_schema():
7
+ # Setup time between check schema
8
+ schema_properties_time_between_check = {
9
+ "type": "object",
10
+ "additionalProperties": False,
11
+ "properties": {}
12
+ }
13
+ for p in ['weeks', 'days', 'hours', 'minutes', 'seconds']:
14
+ schema_properties_time_between_check['properties'][p] = {
15
+ "anyOf": [
16
+ {
17
+ "type": "integer"
18
+ },
19
+ {
20
+ "type": "null"
21
+ }
22
+ ]
23
+ }
24
+
25
+ return schema_properties_time_between_check
26
+
27
+ def build_watch_json_schema(d):
28
+ # Base JSON schema
29
+ schema = {
30
+ 'type': 'object',
31
+ 'properties': {},
32
+ }
33
+
34
+ for k, v in d.items():
35
+ # @todo 'integer' is not covered here because its almost always for internal usage
36
+
37
+ if isinstance(v, type(None)):
38
+ schema['properties'][k] = {
39
+ "anyOf": [
40
+ {"type": "null"},
41
+ ]
42
+ }
43
+ elif isinstance(v, list):
44
+ schema['properties'][k] = {
45
+ "anyOf": [
46
+ {"type": "array",
47
+ # Always is an array of strings, like text or regex or something
48
+ "items": {
49
+ "type": "string",
50
+ "maxLength": 5000
51
+ }
52
+ },
53
+ ]
54
+ }
55
+ elif isinstance(v, bool):
56
+ schema['properties'][k] = {
57
+ "anyOf": [
58
+ {"type": "boolean"},
59
+ ]
60
+ }
61
+ elif isinstance(v, str):
62
+ schema['properties'][k] = {
63
+ "anyOf": [
64
+ {"type": "string",
65
+ "maxLength": 5000},
66
+ ]
67
+ }
68
+
69
+ # Can also be a string (or None by default above)
70
+ for v in ['body',
71
+ 'notification_body',
72
+ 'notification_format',
73
+ 'notification_title',
74
+ 'proxy',
75
+ 'tag',
76
+ 'title',
77
+ 'webdriver_js_execute_code'
78
+ ]:
79
+ schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
80
+
81
+ # None or Boolean
82
+ schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
83
+
84
+ schema['properties']['method'] = {"type": "string",
85
+ "enum": ["GET", "POST", "DELETE", "PUT"]
86
+ }
87
+
88
+ schema['properties']['fetch_backend']['anyOf'].append({"type": "string",
89
+ "enum": ["html_requests", "html_webdriver"]
90
+ })
91
+
92
+
93
+
94
+ # All headers must be key/value type dict
95
+ schema['properties']['headers'] = {
96
+ "type": "object",
97
+ "patternProperties": {
98
+ # Should always be a string:string type value
99
+ ".*": {"type": "string"},
100
+ }
101
+ }
102
+
103
+ schema['properties']['notification_format'] = {'type': 'string',
104
+ 'enum': list(valid_notification_formats.keys())
105
+ }
106
+
107
+ # Stuff that shouldn't be available but is just state-storage
108
+ for v in ['previous_md5', 'last_error', 'has_ldjson_price_data', 'previous_md5_before_filters', 'uuid']:
109
+ del schema['properties'][v]
110
+
111
+ schema['properties']['webdriver_delay']['anyOf'].append({'type': 'integer'})
112
+
113
+ schema['properties']['time_between_check'] = build_time_between_check_json_schema()
114
+
115
+ schema['properties']['browser_steps'] = {
116
+ "anyOf": [
117
+ {
118
+ "type": "array",
119
+ "items": {
120
+ "type": "object",
121
+ "properties": {
122
+ "operation": {
123
+ "type": ["string", "null"],
124
+ "maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
125
+ },
126
+ "selector": {
127
+ "type": ["string", "null"],
128
+ "maxLength": 5000
129
+ },
130
+ "optional_value": {
131
+ "type": ["string", "null"],
132
+ "maxLength": 5000
133
+ }
134
+ },
135
+ "required": ["operation", "selector", "optional_value"],
136
+ "additionalProperties": False # No extra keys allowed
137
+ }
138
+ },
139
+ {"type": "null"}, # Allows null for `browser_steps`
140
+ {"type": "array", "maxItems": 0} # Allows empty array []
141
+ ]
142
+ }
143
+
144
+ # headers ?
145
+ return schema
146
+
changedetectionio/api/auth.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import request, make_response, jsonify
2
+ from functools import wraps
3
+
4
+
5
+ # Simple API auth key comparison
6
+ # @todo - Maybe short lived token in the future?
7
+
8
+ def check_token(f):
9
+ @wraps(f)
10
+ def decorated(*args, **kwargs):
11
+ datastore = args[0].datastore
12
+
13
+ config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled')
14
+ config_api_token = datastore.data['settings']['application'].get('api_access_token')
15
+
16
+ # config_api_token_enabled - a UI option in settings if access should obey the key or not
17
+ if config_api_token_enabled:
18
+ if request.headers.get('x-api-key') != config_api_token:
19
+ return make_response(
20
+ jsonify("Invalid access - API key invalid."), 403
21
+ )
22
+
23
+ return f(*args, **kwargs)
24
+
25
+ return decorated
changedetectionio/async_update_worker.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .processors.exceptions import ProcessorException
2
+ import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
3
+ from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
4
+ from changedetectionio import html_tools
5
+ from changedetectionio.flask_app import watch_check_update
6
+
7
+ import asyncio
8
+ import importlib
9
+ import os
10
+ import time
11
+
12
+ from loguru import logger
13
+
14
+ # Async version of update_worker
15
+ # Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
16
+
17
+ async def async_update_worker(worker_id, q, notification_q, app, datastore):
18
+ """
19
+ Async worker function that processes watch check jobs from the queue.
20
+
21
+ Args:
22
+ worker_id: Unique identifier for this worker
23
+ q: AsyncSignalPriorityQueue containing jobs to process
24
+ notification_q: Standard queue for notifications
25
+ app: Flask application instance
26
+ datastore: Application datastore
27
+ """
28
+ # Set a descriptive name for this task
29
+ task = asyncio.current_task()
30
+ if task:
31
+ task.set_name(f"async-worker-{worker_id}")
32
+
33
+ logger.info(f"Starting async worker {worker_id}")
34
+
35
+ while not app.config.exit.is_set():
36
+ update_handler = None
37
+ watch = None
38
+
39
+ try:
40
+ # Use asyncio wait_for to make queue.get() cancellable
41
+ queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0)
42
+ except asyncio.TimeoutError:
43
+ # No jobs available, continue loop
44
+ continue
45
+ except Exception as e:
46
+ logger.error(f"Worker {worker_id} error getting queue item: {e}")
47
+ await asyncio.sleep(0.1)
48
+ continue
49
+
50
+ uuid = queued_item_data.item.get('uuid')
51
+ fetch_start_time = round(time.time())
52
+
53
+ # Mark this UUID as being processed
54
+ from changedetectionio import worker_handler
55
+ worker_handler.set_uuid_processing(uuid, processing=True)
56
+
57
+ try:
58
+ if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
59
+ changed_detected = False
60
+ contents = b''
61
+ process_changedetection_results = True
62
+ update_obj = {}
63
+
64
+ # Clear last errors
65
+ datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
66
+ datastore.data['watching'][uuid]['last_checked'] = fetch_start_time
67
+
68
+ watch = datastore.data['watching'].get(uuid)
69
+
70
+ logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
71
+
72
+ try:
73
+ watch_check_update.send(watch_uuid=uuid)
74
+
75
+ # Processor is what we are using for detecting the "Change"
76
+ processor = watch.get('processor', 'text_json_diff')
77
+
78
+ # Init a new 'difference_detection_processor'
79
+ processor_module_name = f"changedetectionio.processors.{processor}.processor"
80
+ try:
81
+ processor_module = importlib.import_module(processor_module_name)
82
+ except ModuleNotFoundError as e:
83
+ print(f"Processor module '{processor}' not found.")
84
+ raise e
85
+
86
+ update_handler = processor_module.perform_site_check(datastore=datastore,
87
+ watch_uuid=uuid)
88
+
89
+ # All fetchers are now async, so call directly
90
+ await update_handler.call_browser()
91
+
92
+ # Run change detection (this is synchronous)
93
+ changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
94
+
95
+ except PermissionError as e:
96
+ logger.critical(f"File permission error updating file, watch: {uuid}")
97
+ logger.critical(str(e))
98
+ process_changedetection_results = False
99
+
100
+ except ProcessorException as e:
101
+ if e.screenshot:
102
+ watch.save_screenshot(screenshot=e.screenshot)
103
+ if e.xpath_data:
104
+ watch.save_xpath_data(data=e.xpath_data)
105
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
106
+ process_changedetection_results = False
107
+
108
+ except content_fetchers_exceptions.ReplyWithContentButNoText as e:
109
+ extra_help = ""
110
+ if e.has_filters:
111
+ has_img = html_tools.include_filters(include_filters='img',
112
+ html_content=e.html_content)
113
+ if has_img:
114
+ extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
115
+ else:
116
+ extra_help = ", it's possible that the filters were found, but contained no usable text."
117
+
118
+ datastore.update_watch(uuid=uuid, update_obj={
119
+ 'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
120
+ })
121
+
122
+ if e.screenshot:
123
+ watch.save_screenshot(screenshot=e.screenshot, as_error=True)
124
+
125
+ if e.xpath_data:
126
+ watch.save_xpath_data(data=e.xpath_data)
127
+
128
+ process_changedetection_results = False
129
+
130
+ except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
131
+ if e.status_code == 403:
132
+ err_text = "Error - 403 (Access denied) received"
133
+ elif e.status_code == 404:
134
+ err_text = "Error - 404 (Page not found) received"
135
+ elif e.status_code == 407:
136
+ err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?"
137
+ elif e.status_code == 500:
138
+ err_text = "Error - 500 (Internal server error) received from the web site"
139
+ else:
140
+ extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
141
+ err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
142
+
143
+ if e.screenshot:
144
+ watch.save_screenshot(screenshot=e.screenshot, as_error=True)
145
+ if e.xpath_data:
146
+ watch.save_xpath_data(data=e.xpath_data, as_error=True)
147
+ if e.page_text:
148
+ watch.save_error_text(contents=e.page_text)
149
+
150
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
151
+ process_changedetection_results = False
152
+
153
+ except FilterNotFoundInResponse as e:
154
+ if not datastore.data['watching'].get(uuid):
155
+ continue
156
+
157
+ err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
158
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
159
+
160
+ # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
161
+ if e.screenshot:
162
+ watch.save_screenshot(screenshot=e.screenshot)
163
+
164
+ if e.xpath_data:
165
+ watch.save_xpath_data(data=e.xpath_data)
166
+
167
+ # Only when enabled, send the notification
168
+ if watch.get('filter_failure_notification_send', False):
169
+ c = watch.get('consecutive_filter_failures', 0)
170
+ c += 1
171
+ # Send notification if we reached the threshold?
172
+ threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
173
+ logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
174
+ if c >= threshold:
175
+ if not watch.get('notification_muted'):
176
+ logger.debug(f"Sending filter failed notification for {uuid}")
177
+ await send_filter_failure_notification(uuid, notification_q, datastore)
178
+ c = 0
179
+ logger.debug(f"Reset filter failure count back to zero")
180
+
181
+ datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
182
+ else:
183
+ logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
184
+
185
+ process_changedetection_results = False
186
+
187
+ except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
188
+ # Yes fine, so nothing todo, don't continue to process.
189
+ process_changedetection_results = False
190
+ changed_detected = False
191
+
192
+ except content_fetchers_exceptions.BrowserConnectError as e:
193
+ datastore.update_watch(uuid=uuid,
194
+ update_obj={'last_error': e.msg})
195
+ process_changedetection_results = False
196
+
197
+ except content_fetchers_exceptions.BrowserFetchTimedOut as e:
198
+ datastore.update_watch(uuid=uuid,
199
+ update_obj={'last_error': e.msg})
200
+ process_changedetection_results = False
201
+
202
+ except content_fetchers_exceptions.BrowserStepsStepException as e:
203
+ if not datastore.data['watching'].get(uuid):
204
+ continue
205
+
206
+ error_step = e.step_n + 1
207
+ from playwright._impl._errors import TimeoutError, Error
208
+
209
+ # Generally enough info for TimeoutError (couldnt locate the element after default seconds)
210
+ err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step."
211
+
212
+ if e.original_e.name == "TimeoutError":
213
+ # Just the first line is enough, the rest is the stack trace
214
+ err_text += " Could not find the target."
215
+ else:
216
+ # Other Error, more info is good.
217
+ err_text += " " + str(e.original_e).splitlines()[0]
218
+
219
+ logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
220
+
221
+ datastore.update_watch(uuid=uuid,
222
+ update_obj={'last_error': err_text,
223
+ 'browser_steps_last_error_step': error_step})
224
+
225
+ if watch.get('filter_failure_notification_send', False):
226
+ c = watch.get('consecutive_filter_failures', 0)
227
+ c += 1
228
+ # Send notification if we reached the threshold?
229
+ threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
230
+ logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
231
+ if threshold > 0 and c >= threshold:
232
+ if not watch.get('notification_muted'):
233
+ await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore)
234
+ c = 0
235
+
236
+ datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
237
+
238
+ process_changedetection_results = False
239
+
240
+ except content_fetchers_exceptions.EmptyReply as e:
241
+ # Some kind of custom to-str handler in the exception handler that does this?
242
+ err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
243
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
244
+ 'last_check_status': e.status_code})
245
+ process_changedetection_results = False
246
+
247
+ except content_fetchers_exceptions.ScreenshotUnavailable as e:
248
+ err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
249
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
250
+ 'last_check_status': e.status_code})
251
+ process_changedetection_results = False
252
+
253
+ except content_fetchers_exceptions.JSActionExceptions as e:
254
+ err_text = "Error running JS Actions - Page request - "+e.message
255
+ if e.screenshot:
256
+ watch.save_screenshot(screenshot=e.screenshot, as_error=True)
257
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
258
+ 'last_check_status': e.status_code})
259
+ process_changedetection_results = False
260
+
261
+ except content_fetchers_exceptions.PageUnloadable as e:
262
+ err_text = "Page request from server didnt respond correctly"
263
+ if e.message:
264
+ err_text = "{} - {}".format(err_text, e.message)
265
+
266
+ if e.screenshot:
267
+ watch.save_screenshot(screenshot=e.screenshot, as_error=True)
268
+
269
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
270
+ 'last_check_status': e.status_code,
271
+ 'has_ldjson_price_data': None})
272
+ process_changedetection_results = False
273
+
274
+ except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
275
+ err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
276
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
277
+ process_changedetection_results = False
278
+ logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
279
+
280
+ except Exception as e:
281
+ logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
282
+ logger.error(str(e))
283
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
284
+ process_changedetection_results = False
285
+
286
+ else:
287
+ if not datastore.data['watching'].get(uuid):
288
+ continue
289
+
290
+ update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
291
+
292
+ if not watch.get('ignore_status_codes'):
293
+ update_obj['consecutive_filter_failures'] = 0
294
+
295
+ update_obj['last_error'] = False
296
+ cleanup_error_artifacts(uuid, datastore)
297
+
298
+ if not datastore.data['watching'].get(uuid):
299
+ continue
300
+
301
+ if process_changedetection_results:
302
+ # Extract title if needed
303
+ if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
304
+ if not watch['title'] or not len(watch['title']):
305
+ try:
306
+ update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
307
+ logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
308
+ except Exception as e:
309
+ logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
310
+
311
+ try:
312
+ datastore.update_watch(uuid=uuid, update_obj=update_obj)
313
+
314
+ if changed_detected or not watch.history_n:
315
+ if update_handler.screenshot:
316
+ watch.save_screenshot(screenshot=update_handler.screenshot)
317
+
318
+ if update_handler.xpath_data:
319
+ watch.save_xpath_data(data=update_handler.xpath_data)
320
+
321
+ # Ensure unique timestamp for history
322
+ if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
323
+ logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds")
324
+ fetch_start_time += 1
325
+ await asyncio.sleep(1)
326
+
327
+ watch.save_history_text(contents=contents,
328
+ timestamp=int(fetch_start_time),
329
+ snapshot_id=update_obj.get('previous_md5', 'none'))
330
+
331
+ empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
332
+ if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
333
+ watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
334
+
335
+ # Send notifications on second+ check
336
+ if watch.history_n >= 2:
337
+ logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
338
+ if not watch.get('notification_muted'):
339
+ await send_content_changed_notification(uuid, notification_q, datastore)
340
+
341
+ except Exception as e:
342
+ logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
343
+ logger.critical(str(e))
344
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
345
+
346
+ # Always record attempt count
347
+ count = watch.get('check_count', 0) + 1
348
+
349
+ # Record server header
350
+ try:
351
+ server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
352
+ datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
353
+ except Exception as e:
354
+ pass
355
+
356
+ datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
357
+ 'check_count': count})
358
+
359
+ except Exception as e:
360
+ logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
361
+ logger.error(f"Worker {worker_id} traceback:", exc_info=True)
362
+
363
+ # Also update the watch with error information
364
+ if datastore and uuid in datastore.data['watching']:
365
+ datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
366
+
367
+ finally:
368
+ # Always cleanup - this runs whether there was an exception or not
369
+ if uuid:
370
+ try:
371
+ # Mark UUID as no longer being processed
372
+ worker_handler.set_uuid_processing(uuid, processing=False)
373
+
374
+ # Send completion signal
375
+ if watch:
376
+ #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
377
+ watch_check_update.send(watch_uuid=watch['uuid'])
378
+
379
+ update_handler = None
380
+ logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
381
+ except Exception as cleanup_error:
382
+ logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
383
+
384
+ # Brief pause before continuing to avoid tight error loops (only on error)
385
+ if 'e' in locals():
386
+ await asyncio.sleep(1.0)
387
+ else:
388
+ # Small yield for normal completion
389
+ await asyncio.sleep(0.01)
390
+
391
+ # Check if we should exit
392
+ if app.config.exit.is_set():
393
+ break
394
+
395
+ # Check if we're in pytest environment - if so, be more gentle with logging
396
+ import sys
397
+ in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
398
+
399
+ if not in_pytest:
400
+ logger.info(f"Worker {worker_id} shutting down")
401
+
402
+
403
+ def cleanup_error_artifacts(uuid, datastore):
404
+ """Helper function to clean up error artifacts"""
405
+ cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
406
+ for f in cleanup_files:
407
+ full_path = os.path.join(datastore.datastore_path, uuid, f)
408
+ if os.path.isfile(full_path):
409
+ os.unlink(full_path)
410
+
411
+
412
+
413
+ async def send_content_changed_notification(watch_uuid, notification_q, datastore):
414
+ """Helper function to queue notifications using the new notification service"""
415
+ try:
416
+ from changedetectionio.notification_service import create_notification_service
417
+
418
+ # Create notification service instance
419
+ notification_service = create_notification_service(datastore, notification_q)
420
+
421
+ notification_service.send_content_changed_notification(watch_uuid)
422
+ except Exception as e:
423
+ logger.error(f"Error sending notification for {watch_uuid}: {e}")
424
+
425
+
426
+ async def send_filter_failure_notification(watch_uuid, notification_q, datastore):
427
+ """Helper function to send filter failure notifications using the new notification service"""
428
+ try:
429
+ from changedetectionio.notification_service import create_notification_service
430
+
431
+ # Create notification service instance
432
+ notification_service = create_notification_service(datastore, notification_q)
433
+
434
+ notification_service.send_filter_failure_notification(watch_uuid)
435
+ except Exception as e:
436
+ logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}")
437
+
438
+
439
+ async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore):
440
+ """Helper function to send step failure notifications using the new notification service"""
441
+ try:
442
+ from changedetectionio.notification_service import create_notification_service
443
+
444
+ # Create notification service instance
445
+ notification_service = create_notification_service(datastore, notification_q)
446
+
447
+ notification_service.send_step_failure_notification(watch_uuid, step_n)
448
+ except Exception as e:
449
+ logger.error(f"Error sending step failure notification for {watch_uuid}: {e}")
changedetectionio/auth_decorator.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from functools import wraps
3
+ from flask import current_app, redirect, request
4
+ from loguru import logger
5
+
6
+ def login_optionally_required(func):
7
+ """
8
+ If password authentication is enabled, verify the user is logged in.
9
+ To be used as a decorator for routes that should optionally require login.
10
+ This version is blueprint-friendly as it uses current_app instead of directly accessing app.
11
+ """
12
+ @wraps(func)
13
+ def decorated_view(*args, **kwargs):
14
+ from flask import current_app
15
+ import flask_login
16
+ from flask_login import current_user
17
+
18
+ # Access datastore through the app config
19
+ datastore = current_app.config['DATASTORE']
20
+ has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
21
+
22
+ # Permitted
23
+ if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
24
+ return func(*args, **kwargs)
25
+ elif request.method in flask_login.config.EXEMPT_METHODS:
26
+ return func(*args, **kwargs)
27
+ elif current_app.config.get('LOGIN_DISABLED'):
28
+ return func(*args, **kwargs)
29
+ elif has_password_enabled and not current_user.is_authenticated:
30
+ return current_app.login_manager.unauthorized()
31
+
32
+ return func(*args, **kwargs)
33
+ return decorated_view
changedetectionio/blueprint/__init__.py ADDED
File without changes
changedetectionio/blueprint/backups/__init__.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import glob
3
+ import threading
4
+
5
+ from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort
6
+ import os
7
+
8
+ from changedetectionio.store import ChangeDetectionStore
9
+ from changedetectionio.flask_app import login_optionally_required
10
+ from loguru import logger
11
+
12
+ BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
13
+
14
+
15
+ def create_backup(datastore_path, watches: dict):
16
+ logger.debug("Creating backup...")
17
+ import zipfile
18
+ from pathlib import Path
19
+
20
+ # create a ZipFile object
21
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
22
+ backupname = BACKUP_FILENAME_FORMAT.format(timestamp)
23
+ backup_filepath = os.path.join(datastore_path, backupname)
24
+
25
+ with zipfile.ZipFile(backup_filepath.replace('.zip', '.tmp'), "w",
26
+ compression=zipfile.ZIP_DEFLATED,
27
+ compresslevel=8) as zipObj:
28
+
29
+ # Add the index
30
+ zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
31
+
32
+ # Add the flask app secret
33
+ zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
34
+
35
+ # Add any data in the watch data directory.
36
+ for uuid, w in watches.items():
37
+ for f in Path(w.watch_data_dir).glob('*'):
38
+ zipObj.write(f,
39
+ # Use the full path to access the file, but make the file 'relative' in the Zip.
40
+ arcname=os.path.join(f.parts[-2], f.parts[-1]),
41
+ compress_type=zipfile.ZIP_DEFLATED,
42
+ compresslevel=8)
43
+
44
+ # Create a list file with just the URLs, so it's easier to port somewhere else in the future
45
+ list_file = "url-list.txt"
46
+ with open(os.path.join(datastore_path, list_file), "w") as f:
47
+ for uuid in watches:
48
+ url = watches[uuid]["url"]
49
+ f.write("{}\r\n".format(url))
50
+ list_with_tags_file = "url-list-with-tags.txt"
51
+ with open(
52
+ os.path.join(datastore_path, list_with_tags_file), "w"
53
+ ) as f:
54
+ for uuid in watches:
55
+ url = watches[uuid].get('url')
56
+ tag = watches[uuid].get('tags', {})
57
+ f.write("{} {}\r\n".format(url, tag))
58
+
59
+ # Add it to the Zip
60
+ zipObj.write(
61
+ os.path.join(datastore_path, list_file),
62
+ arcname=list_file,
63
+ compress_type=zipfile.ZIP_DEFLATED,
64
+ compresslevel=8,
65
+ )
66
+ zipObj.write(
67
+ os.path.join(datastore_path, list_with_tags_file),
68
+ arcname=list_with_tags_file,
69
+ compress_type=zipfile.ZIP_DEFLATED,
70
+ compresslevel=8,
71
+ )
72
+
73
+ # Now it's done, rename it so it shows up finally and its completed being written.
74
+ os.rename(backup_filepath.replace('.zip', '.tmp'), backup_filepath.replace('.tmp', '.zip'))
75
+
76
+
77
+ def construct_blueprint(datastore: ChangeDetectionStore):
78
+ backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
79
+ backup_threads = []
80
+
81
+ @login_optionally_required
82
+ @backups_blueprint.route("/request-backup", methods=['GET'])
83
+ def request_backup():
84
+ if any(thread.is_alive() for thread in backup_threads):
85
+ flash("A backup is already running, check back in a few minutes", "error")
86
+ return redirect(url_for('backups.index'))
87
+
88
+ if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
89
+ flash("Maximum number of backups reached, please remove some", "error")
90
+ return redirect(url_for('backups.index'))
91
+
92
+ # Be sure we're written fresh
93
+ datastore.sync_to_json()
94
+ zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
95
+ zip_thread.start()
96
+ backup_threads.append(zip_thread)
97
+ flash("Backup building in background, check back in a few minutes.")
98
+
99
+ return redirect(url_for('backups.index'))
100
+
101
+ def find_backups():
102
+ backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
103
+ backups = glob.glob(backup_filepath)
104
+ backup_info = []
105
+
106
+ for backup in backups:
107
+ size = os.path.getsize(backup) / (1024 * 1024)
108
+ creation_time = os.path.getctime(backup)
109
+ backup_info.append({
110
+ 'filename': os.path.basename(backup),
111
+ 'filesize': f"{size:.2f}",
112
+ 'creation_time': creation_time
113
+ })
114
+
115
+ backup_info.sort(key=lambda x: x['creation_time'], reverse=True)
116
+
117
+ return backup_info
118
+
119
+ @login_optionally_required
120
+ @backups_blueprint.route("/download/<string:filename>", methods=['GET'])
121
+ def download_backup(filename):
122
+ import re
123
+ filename = filename.strip()
124
+ backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
125
+
126
+ full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
127
+ if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
128
+ abort(404)
129
+
130
+ if filename == 'latest':
131
+ backups = find_backups()
132
+ filename = backups[0]['filename']
133
+
134
+ if not re.match(r"^" + backup_filename_regex + "$", filename):
135
+ abort(400) # Bad Request if the filename doesn't match the pattern
136
+
137
+ logger.debug(f"Backup download request for '{full_path}'")
138
+ return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
139
+
140
+ @login_optionally_required
141
+ @backups_blueprint.route("", methods=['GET'])
142
+ def index():
143
+ backups = find_backups()
144
+ output = render_template("overview.html",
145
+ available_backups=backups,
146
+ backup_running=any(thread.is_alive() for thread in backup_threads)
147
+ )
148
+
149
+ return output
150
+
151
+ @login_optionally_required
152
+ @backups_blueprint.route("/remove-backups", methods=['GET'])
153
+ def remove_backups():
154
+
155
+ backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
156
+ backups = glob.glob(backup_filepath)
157
+ for backup in backups:
158
+ os.unlink(backup)
159
+
160
+ flash("Backups were deleted.")
161
+
162
+ return redirect(url_for('backups.index'))
163
+
164
+ return backups_blueprint
changedetectionio/blueprint/backups/templates/overview.html ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+ {% block content %}
3
+ {% from '_helpers.html' import render_simple_field, render_field %}
4
+ <div class="edit-form">
5
+ <div class="box-wrap inner">
6
+ <h4>Backups</h4>
7
+ {% if backup_running %}
8
+ <p>
9
+ <strong>A backup is running!</strong>
10
+ </p>
11
+ {% endif %}
12
+ <p>
13
+ Here you can download and request a new backup, when a backup is completed you will see it listed below.
14
+ </p>
15
+ <br>
16
+ {% if available_backups %}
17
+ <ul>
18
+ {% for backup in available_backups %}
19
+ <li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} Mb</li>
20
+ {% endfor %}
21
+ </ul>
22
+ {% else %}
23
+ <p>
24
+ <strong>No backups found.</strong>
25
+ </p>
26
+ {% endif %}
27
+
28
+ <a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">Create backup</a>
29
+ {% if available_backups %}
30
+ <a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">Remove backups</a>
31
+ {% endif %}
32
+ </div>
33
+ </div>
34
+
35
+
36
+ {% endblock %}
changedetectionio/blueprint/browser_steps/TODO.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - This needs an abstraction to directly handle the puppeteer connection methods
2
+ - Then remove the playwright stuff
3
+ - Remove hack redirect at line 65 changedetectionio/processors/__init__.py
4
+
5
+ The screenshots are base64 encoded/decoded which is very CPU intensive for large screenshots (in playwright) but not
6
+ in the direct puppeteer connection (they are binary end to end)
7
+
changedetectionio/blueprint/browser_steps/__init__.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # HORRIBLE HACK BUT WORKS :-) PR anyone?
3
+ #
4
+ # Why?
5
+ # `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async()
6
+ # - this flask app is not async()
7
+ # - A single timeout/keepalive which applies to the session made at .connect_over_cdp()
8
+ #
9
+ # So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run
10
+ # and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user
11
+ # that their time is up, insert another coin. (reload)
12
+ #
13
+ #
14
+
15
+ from changedetectionio.strtobool import strtobool
16
+ from flask import Blueprint, request, make_response
17
+ import os
18
+
19
+ from changedetectionio.store import ChangeDetectionStore
20
+ from changedetectionio.flask_app import login_optionally_required
21
+ from loguru import logger
22
+
23
+ browsersteps_sessions = {}
24
+ io_interface_context = None
25
+ import json
26
+ import hashlib
27
+ from flask import Response
28
+ import asyncio
29
+ import threading
30
+
31
+ def run_async_in_browser_loop(coro):
32
+ """Run async coroutine using the existing async worker event loop"""
33
+ from changedetectionio import worker_handler
34
+
35
+ # Use the existing async worker event loop instead of creating a new one
36
+ if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
37
+ logger.debug("Browser steps using existing async worker event loop")
38
+ future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
39
+ return future.result()
40
+ else:
41
+ # Fallback: create a new event loop (for sync workers or if async loop not available)
42
+ logger.debug("Browser steps creating temporary event loop")
43
+ loop = asyncio.new_event_loop()
44
+ asyncio.set_event_loop(loop)
45
+ try:
46
+ return loop.run_until_complete(coro)
47
+ finally:
48
+ loop.close()
49
+
50
+ def construct_blueprint(datastore: ChangeDetectionStore):
51
+ browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
52
+
53
+ async def start_browsersteps_session(watch_uuid):
54
+ from . import browser_steps
55
+ import time
56
+ from playwright.async_api import async_playwright
57
+
58
+ # We keep the playwright session open for many minutes
59
+ keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
60
+
61
+ browsersteps_start_session = {'start_time': time.time()}
62
+
63
+ # Create a new async playwright instance for browser steps
64
+ playwright_instance = async_playwright()
65
+ playwright_context = await playwright_instance.start()
66
+
67
+ keepalive_ms = ((keepalive_seconds + 3) * 1000)
68
+ base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
69
+ a = "?" if not '?' in base_url else '&'
70
+ base_url += a + f"timeout={keepalive_ms}"
71
+
72
+ browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
73
+ browsersteps_start_session['browser'] = browser
74
+ browsersteps_start_session['playwright_context'] = playwright_context
75
+
76
+ proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
77
+ proxy = None
78
+ if proxy_id:
79
+ proxy_url = datastore.proxy_list.get(proxy_id).get('url')
80
+ if proxy_url:
81
+
82
+ # Playwright needs separate username and password values
83
+ from urllib.parse import urlparse
84
+ parsed = urlparse(proxy_url)
85
+ proxy = {'server': proxy_url}
86
+
87
+ if parsed.username:
88
+ proxy['username'] = parsed.username
89
+
90
+ if parsed.password:
91
+ proxy['password'] = parsed.password
92
+
93
+ logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
94
+
95
+ # Tell Playwright to connect to Chrome and setup a new session via our stepper interface
96
+ browserstepper = browser_steps.browsersteps_live_ui(
97
+ playwright_browser=browser,
98
+ proxy=proxy,
99
+ start_url=datastore.data['watching'][watch_uuid].link,
100
+ headers=datastore.data['watching'][watch_uuid].get('headers')
101
+ )
102
+
103
+ # Initialize the async connection
104
+ await browserstepper.connect(proxy=proxy)
105
+
106
+ browsersteps_start_session['browserstepper'] = browserstepper
107
+
108
+ # For test
109
+ #await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
110
+
111
+ return browsersteps_start_session
112
+
113
+
114
+ @login_optionally_required
115
+ @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
116
+ def browsersteps_start_session():
117
+ # A new session was requested, return sessionID
118
+ import asyncio
119
+ import uuid
120
+ browsersteps_session_id = str(uuid.uuid4())
121
+ watch_uuid = request.args.get('uuid')
122
+
123
+ if not watch_uuid:
124
+ return make_response('No Watch UUID specified', 500)
125
+
126
+ logger.debug("Starting connection with playwright")
127
+ logger.debug("browser_steps.py connecting")
128
+
129
+ try:
130
+ # Run the async function in the dedicated browser steps event loop
131
+ browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
132
+ start_browsersteps_session(watch_uuid)
133
+ )
134
+ except Exception as e:
135
+ if 'ECONNREFUSED' in str(e):
136
+ return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
137
+ else:
138
+ # Other errors, bad URL syntax, bad reply etc
139
+ return make_response(str(e), 401)
140
+
141
+ logger.debug("Starting connection with playwright - done")
142
+ return {'browsersteps_session_id': browsersteps_session_id}
143
+
144
+ @login_optionally_required
145
+ @browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
146
+ def browser_steps_fetch_screenshot_image():
147
+ from flask import (
148
+ make_response,
149
+ request,
150
+ send_from_directory,
151
+ )
152
+ uuid = request.args.get('uuid')
153
+ step_n = int(request.args.get('step_n'))
154
+
155
+ watch = datastore.data['watching'].get(uuid)
156
+ filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
157
+
158
+ if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
159
+ response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
160
+ response.headers['Content-type'] = 'image/jpeg'
161
+ response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
162
+ response.headers['Pragma'] = 'no-cache'
163
+ response.headers['Expires'] = 0
164
+ return response
165
+
166
+ else:
167
+ return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
168
+
169
+ # A request for an action was received
170
+ @login_optionally_required
171
+ @browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
172
+ def browsersteps_ui_update():
173
+ import base64
174
+ import playwright._impl._errors
175
+ from changedetectionio.blueprint.browser_steps import browser_steps
176
+
177
+ remaining =0
178
+ uuid = request.args.get('uuid')
179
+
180
+ browsersteps_session_id = request.args.get('browsersteps_session_id')
181
+
182
+ if not browsersteps_session_id:
183
+ return make_response('No browsersteps_session_id specified', 500)
184
+
185
+ if not browsersteps_sessions.get(browsersteps_session_id):
186
+ return make_response('No session exists under that ID', 500)
187
+
188
+ is_last_step = False
189
+ # Actions - step/apply/etc, do the thing and return state
190
+ if request.method == 'POST':
191
+ # @todo - should always be an existing session
192
+ step_operation = request.form.get('operation')
193
+ step_selector = request.form.get('selector')
194
+ step_optional_value = request.form.get('optional_value')
195
+ is_last_step = strtobool(request.form.get('is_last_step'))
196
+
197
+ try:
198
+ # Run the async call_action method in the dedicated browser steps event loop
199
+ run_async_in_browser_loop(
200
+ browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
201
+ action_name=step_operation,
202
+ selector=step_selector,
203
+ optional_value=step_optional_value
204
+ )
205
+ )
206
+
207
+ except Exception as e:
208
+ logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
209
+ # Try to find something of value to give back to the user
210
+ return make_response(str(e).splitlines()[0], 401)
211
+
212
+
213
+ # if not this_session.page:
214
+ # cleanup_playwright_session()
215
+ # return make_response('Browser session ran out of time :( Please reload this page.', 401)
216
+
217
+ # Screenshots and other info only needed on requesting a step (POST)
218
+ try:
219
+ # Run the async get_current_state method in the dedicated browser steps event loop
220
+ (screenshot, xpath_data) = run_async_in_browser_loop(
221
+ browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
222
+ )
223
+
224
+ if is_last_step:
225
+ watch = datastore.data['watching'].get(uuid)
226
+ u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
227
+ if watch and u:
228
+ watch.save_screenshot(screenshot=screenshot)
229
+ watch.save_xpath_data(data=xpath_data)
230
+
231
+ except Exception as e:
232
+ return make_response(f"Error fetching screenshot and element data - {str(e)}", 401)
233
+
234
+ # SEND THIS BACK TO THE BROWSER
235
+ output = {
236
+ "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
237
+ "xpath_data": xpath_data,
238
+ "session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
239
+ "browser_time_remaining": round(remaining)
240
+ }
241
+ json_data = json.dumps(output)
242
+
243
+ # Generate an ETag (hash of the response body)
244
+ etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest()
245
+
246
+ # Create the response with ETag
247
+ response = Response(json_data, mimetype="application/json; charset=UTF-8")
248
+ response.set_etag(etag_hash)
249
+
250
+ return response
251
+
252
+ return browser_steps_blueprint
253
+
254
+
changedetectionio/blueprint/browser_steps/browser_steps.py ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import re
4
+ from random import randint
5
+ from loguru import logger
6
+
7
+ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
8
+ from changedetectionio.content_fetchers.base import manage_user_agent
9
+ from changedetectionio.safe_jinja import render as jinja_render
10
+
11
+
12
+
13
+ # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
14
+ # 0- off, 1- on
15
+ browser_step_ui_config = {'Choose one': '0 0',
16
+ # 'Check checkbox': '1 0',
17
+ # 'Click button containing text': '0 1',
18
+ # 'Scroll to bottom': '0 0',
19
+ # 'Scroll to element': '1 0',
20
+ # 'Scroll to top': '0 0',
21
+ # 'Switch to iFrame by index number': '0 1'
22
+ # 'Uncheck checkbox': '1 0',
23
+ # @todo
24
+ 'Check checkbox': '1 0',
25
+ 'Click X,Y': '0 1',
26
+ 'Click element if exists': '1 0',
27
+ 'Click element': '1 0',
28
+ 'Click element containing text': '0 1',
29
+ 'Click element containing text if exists': '0 1',
30
+ 'Enter text in field': '1 1',
31
+ 'Execute JS': '0 1',
32
+ # 'Extract text and use as filter': '1 0',
33
+ 'Goto site': '0 0',
34
+ 'Goto URL': '0 1',
35
+ 'Make all child elements visible': '1 0',
36
+ 'Press Enter': '0 0',
37
+ 'Select by label': '1 1',
38
+ '<select> by option text': '1 1',
39
+ 'Scroll down': '0 0',
40
+ 'Uncheck checkbox': '1 0',
41
+ 'Wait for seconds': '0 1',
42
+ 'Wait for text': '0 1',
43
+ 'Wait for text in element': '1 1',
44
+ 'Remove elements': '1 0',
45
+ # 'Press Page Down': '0 0',
46
+ # 'Press Page Up': '0 0',
47
+ # weird bug, come back to it later
48
+ }
49
+
50
+
51
+ # Good reference - https://playwright.dev/python/docs/input
52
+ # https://pythonmana.com/2021/12/202112162236307035.html
53
+ #
54
+ # ONLY Works in Playwright because we need the fullscreen screenshot
55
+ class steppable_browser_interface():
56
+ page = None
57
+ start_url = None
58
+ action_timeout = 10 * 1000
59
+
60
+ def __init__(self, start_url):
61
+ self.start_url = start_url
62
+
63
+ # Convert and perform "Click Button" for example
64
+ async def call_action(self, action_name, selector=None, optional_value=None):
65
+ if self.page is None:
66
+ logger.warning("Cannot call action on None page object")
67
+ return
68
+
69
+ now = time.time()
70
+ call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
71
+ if call_action_name == 'choose_one':
72
+ return
73
+
74
+ logger.debug(f"> Action calling '{call_action_name}'")
75
+ # https://playwright.dev/python/docs/selectors#xpath-selectors
76
+ if selector and selector.startswith('/') and not selector.startswith('//'):
77
+ selector = "xpath=" + selector
78
+
79
+ # Check if action handler exists
80
+ if not hasattr(self, "action_" + call_action_name):
81
+ logger.warning(f"Action handler for '{call_action_name}' not found")
82
+ return
83
+
84
+ action_handler = getattr(self, "action_" + call_action_name)
85
+
86
+ # Support for Jinja2 variables in the value and selector
87
+ if selector and ('{%' in selector or '{{' in selector):
88
+ selector = jinja_render(template_str=selector)
89
+
90
+ if optional_value and ('{%' in optional_value or '{{' in optional_value):
91
+ optional_value = jinja_render(template_str=optional_value)
92
+
93
+ # Trigger click and cautiously handle potential navigation
94
+ # This means the page redirects/reloads/changes JS etc etc
95
+ if call_action_name.startswith('click_'):
96
+ try:
97
+ # Set up navigation expectation before the click (like sync version)
98
+ async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info:
99
+ await action_handler(selector, optional_value)
100
+
101
+ # Check if navigation actually occurred
102
+ try:
103
+ await navigation_info.value # This waits for the navigation promise
104
+ logger.debug(f"Navigation occurred on {call_action_name}.")
105
+ except Exception:
106
+ logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
107
+
108
+ except Exception as e:
109
+ # If expect_event itself times out, that means no navigation occurred - that's OK
110
+ if "framenavigated" in str(e) and "exceeded" in str(e):
111
+ logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
112
+ else:
113
+ raise e
114
+ else:
115
+ # Some other action that probably a navigation is not expected
116
+ await action_handler(selector, optional_value)
117
+
118
+
119
+ # Safely wait for timeout
120
+ await self.page.wait_for_timeout(1.5 * 1000)
121
+ logger.debug(f"Call action done in {time.time()-now:.2f}s")
122
+
123
+ async def action_goto_url(self, selector=None, value=None):
124
+ if not value:
125
+ logger.warning("No URL provided for goto_url action")
126
+ return None
127
+
128
+ now = time.time()
129
+ response = await self.page.goto(value, timeout=0, wait_until='load')
130
+ logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
131
+ return response
132
+
133
+ # Incase they request to go back to the start
134
+ async def action_goto_site(self, selector=None, value=None):
135
+ return await self.action_goto_url(value=self.start_url)
136
+
137
+ async def action_click_element_containing_text(self, selector=None, value=''):
138
+ logger.debug("Clicking element containing text")
139
+ if not value or not len(value.strip()):
140
+ return
141
+
142
+ elem = self.page.get_by_text(value)
143
+ if await elem.count():
144
+ await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
145
+
146
+
147
+ async def action_click_element_containing_text_if_exists(self, selector=None, value=''):
148
+ logger.debug("Clicking element containing text if exists")
149
+ if not value or not len(value.strip()):
150
+ return
151
+
152
+ elem = self.page.get_by_text(value)
153
+ count = await elem.count()
154
+ logger.debug(f"Clicking element containing text - {count} elements found")
155
+ if count:
156
+ await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
157
+
158
+
159
+ async def action_enter_text_in_field(self, selector, value):
160
+ if not selector or not len(selector.strip()):
161
+ return
162
+
163
+ await self.page.fill(selector, value, timeout=self.action_timeout)
164
+
165
+ async def action_execute_js(self, selector, value):
166
+ if not value:
167
+ return None
168
+
169
+ return await self.page.evaluate(value)
170
+
171
+ async def action_click_element(self, selector, value):
172
+ logger.debug("Clicking element")
173
+ if not selector or not len(selector.strip()):
174
+ return
175
+
176
+ await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
177
+
178
+ async def action_click_element_if_exists(self, selector, value):
179
+ import playwright._impl._errors as _api_types
180
+ logger.debug("Clicking element if exists")
181
+ if not selector or not len(selector.strip()):
182
+ return
183
+
184
+ try:
185
+ await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
186
+ except _api_types.TimeoutError:
187
+ return
188
+ except _api_types.Error:
189
+ # Element was there, but page redrew and now its long long gone
190
+ return
191
+
192
+
193
+ async def action_click_x_y(self, selector, value):
194
+ if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
195
+ logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
196
+ return
197
+
198
+ try:
199
+ x, y = value.strip().split(',')
200
+ x = int(float(x.strip()))
201
+ y = int(float(y.strip()))
202
+
203
+ await self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
204
+
205
+ except Exception as e:
206
+ logger.error(f"Error parsing x,y coordinates: {str(e)}")
207
+
208
+ async def action__select_by_option_text(self, selector, value):
209
+ if not selector or not len(selector.strip()):
210
+ return
211
+
212
+ await self.page.select_option(selector, label=value, timeout=self.action_timeout)
213
+
214
+ async def action_scroll_down(self, selector, value):
215
+ # Some sites this doesnt work on for some reason
216
+ await self.page.mouse.wheel(0, 600)
217
+ await self.page.wait_for_timeout(1000)
218
+
219
+ async def action_wait_for_seconds(self, selector, value):
220
+ try:
221
+ seconds = float(value.strip()) if value else 1.0
222
+ await self.page.wait_for_timeout(seconds * 1000)
223
+ except (ValueError, TypeError) as e:
224
+ logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
225
+
226
+ async def action_wait_for_text(self, selector, value):
227
+ if not value:
228
+ return
229
+
230
+ import json
231
+ v = json.dumps(value)
232
+ await self.page.wait_for_function(
233
+ f'document.querySelector("body").innerText.includes({v});',
234
+ timeout=30000
235
+ )
236
+
237
+
238
+ async def action_wait_for_text_in_element(self, selector, value):
239
+ if not selector or not value:
240
+ return
241
+
242
+ import json
243
+ s = json.dumps(selector)
244
+ v = json.dumps(value)
245
+
246
+ await self.page.wait_for_function(
247
+ f'document.querySelector({s}).innerText.includes({v});',
248
+ timeout=30000
249
+ )
250
+
251
+ # @todo - in the future make some popout interface to capture what needs to be set
252
+ # https://playwright.dev/python/docs/api/class-keyboard
253
+ async def action_press_enter(self, selector, value):
254
+ await self.page.keyboard.press("Enter", delay=randint(200, 500))
255
+
256
+
257
+ async def action_press_page_up(self, selector, value):
258
+ await self.page.keyboard.press("PageUp", delay=randint(200, 500))
259
+
260
+ async def action_press_page_down(self, selector, value):
261
+ await self.page.keyboard.press("PageDown", delay=randint(200, 500))
262
+
263
+ async def action_check_checkbox(self, selector, value):
264
+ if not selector:
265
+ return
266
+
267
+ await self.page.locator(selector).check(timeout=self.action_timeout)
268
+
269
+ async def action_uncheck_checkbox(self, selector, value):
270
+ if not selector:
271
+ return
272
+
273
+ await self.page.locator(selector).uncheck(timeout=self.action_timeout)
274
+
275
+
276
+ async def action_remove_elements(self, selector, value):
277
+ """Removes all elements matching the given selector from the DOM."""
278
+ if not selector:
279
+ return
280
+
281
+ await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
282
+
283
+ async def action_make_all_child_elements_visible(self, selector, value):
284
+ """Recursively makes all child elements inside the given selector fully visible."""
285
+ if not selector:
286
+ return
287
+
288
+ await self.page.locator(selector).locator("*").evaluate_all("""
289
+ els => els.forEach(el => {
290
+ el.style.display = 'block'; // Forces it to be displayed
291
+ el.style.visibility = 'visible'; // Ensures it's not hidden
292
+ el.style.opacity = '1'; // Fully opaque
293
+ el.style.position = 'relative'; // Avoids 'absolute' hiding
294
+ el.style.height = 'auto'; // Expands collapsed elements
295
+ el.style.width = 'auto'; // Ensures full visibility
296
+ el.removeAttribute('hidden'); // Removes hidden attribute
297
+ el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
298
+ })
299
+ """)
300
+
301
+ # Responsible for maintaining a live 'context' with the chrome CDP
302
+ # @todo - how long do contexts live for anyway?
303
+ class browsersteps_live_ui(steppable_browser_interface):
304
+ context = None
305
+ page = None
306
+ render_extra_delay = 1
307
+ stale = False
308
+ # bump and kill this if idle after X sec
309
+ age_start = 0
310
+ headers = {}
311
+ # Track if resources are properly cleaned up
312
+ _is_cleaned_up = False
313
+
314
+ # use a special driver, maybe locally etc
315
+ command_executor = os.getenv(
316
+ "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
317
+ )
318
+ # if not..
319
+ if not command_executor:
320
+ command_executor = os.getenv(
321
+ "PLAYWRIGHT_DRIVER_URL",
322
+ 'ws://playwright-chrome:3000'
323
+ ).strip('"')
324
+
325
+ browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
326
+
327
+ def __init__(self, playwright_browser, proxy=None, headers=None, start_url=None):
328
+ self.headers = headers or {}
329
+ self.age_start = time.time()
330
+ self.playwright_browser = playwright_browser
331
+ self.start_url = start_url
332
+ self._is_cleaned_up = False
333
+ self.proxy = proxy
334
+ # Note: connect() is now async and must be called separately
335
+
336
+ def __del__(self):
337
+ # Ensure cleanup happens if object is garbage collected
338
+ # Note: cleanup is now async, so we can only mark as cleaned up here
339
+ self._is_cleaned_up = True
340
+
341
+ # Connect and setup a new context
342
+ async def connect(self, proxy=None):
343
+ # Should only get called once - test that
344
+ keep_open = 1000 * 60 * 5
345
+ now = time.time()
346
+
347
+ # @todo handle multiple contexts, bind a unique id from the browser on each req?
348
+ self.context = await self.playwright_browser.new_context(
349
+ accept_downloads=False, # Should never be needed
350
+ bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
351
+ extra_http_headers=self.headers,
352
+ ignore_https_errors=True,
353
+ proxy=proxy,
354
+ service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
355
+ # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
356
+ user_agent=manage_user_agent(headers=self.headers),
357
+ )
358
+
359
+ self.page = await self.context.new_page()
360
+
361
+ # self.page.set_default_navigation_timeout(keep_open)
362
+ self.page.set_default_timeout(keep_open)
363
+ # Set event handlers
364
+ self.page.on("close", self.mark_as_closed)
365
+ # Listen for all console events and handle errors
366
+ self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
367
+
368
+ logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
369
+ await self.page.wait_for_timeout(1 * 1000)
370
+
371
+ def mark_as_closed(self):
372
+ logger.debug("Page closed, cleaning up..")
373
+ # Note: This is called from a sync context (event handler)
374
+ # so we'll just mark as cleaned up and let __del__ handle the rest
375
+ self._is_cleaned_up = True
376
+
377
+ async def cleanup(self):
378
+ """Properly clean up all resources to prevent memory leaks"""
379
+ if self._is_cleaned_up:
380
+ return
381
+
382
+ logger.debug("Cleaning up browser steps resources")
383
+
384
+ # Clean up page
385
+ if hasattr(self, 'page') and self.page is not None:
386
+ try:
387
+ # Force garbage collection before closing
388
+ await self.page.request_gc()
389
+ except Exception as e:
390
+ logger.debug(f"Error during page garbage collection: {str(e)}")
391
+
392
+ try:
393
+ # Remove event listeners before closing
394
+ self.page.remove_listener("close", self.mark_as_closed)
395
+ except Exception as e:
396
+ logger.debug(f"Error removing event listeners: {str(e)}")
397
+
398
+ try:
399
+ await self.page.close()
400
+ except Exception as e:
401
+ logger.debug(f"Error closing page: {str(e)}")
402
+
403
+ self.page = None
404
+
405
+ # Clean up context
406
+ if hasattr(self, 'context') and self.context is not None:
407
+ try:
408
+ await self.context.close()
409
+ except Exception as e:
410
+ logger.debug(f"Error closing context: {str(e)}")
411
+
412
+ self.context = None
413
+
414
+ self._is_cleaned_up = True
415
+ logger.debug("Browser steps resources cleanup complete")
416
+
417
+ @property
418
+ def has_expired(self):
419
+ if not self.page or self._is_cleaned_up:
420
+ return True
421
+
422
+ # Check if session has expired based on age
423
+ max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10)) # Default 10 minutes
424
+ if (time.time() - self.age_start) > max_age_seconds:
425
+ logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
426
+ return True
427
+
428
+ return False
429
+
430
+ async def get_current_state(self):
431
+ """Return the screenshot and interactive elements mapping, generally always called after action_()"""
432
+ import importlib.resources
433
+ import json
434
+ # because we for now only run browser steps in playwright mode (not puppeteer mode)
435
+ from changedetectionio.content_fetchers.playwright import capture_full_page_async
436
+
437
+ # Safety check - don't proceed if resources are cleaned up
438
+ if self._is_cleaned_up or self.page is None:
439
+ logger.warning("Attempted to get current state after cleanup")
440
+ return (None, None)
441
+
442
+ xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
443
+
444
+ now = time.time()
445
+ await self.page.wait_for_timeout(1 * 1000)
446
+
447
+ screenshot = None
448
+ xpath_data = None
449
+
450
+ try:
451
+ # Get screenshot first
452
+ screenshot = await capture_full_page_async(page=self.page)
453
+ if not screenshot:
454
+ logger.error("No screenshot was retrieved :((")
455
+
456
+ logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
457
+
458
+ # Then get interactive elements
459
+ now = time.time()
460
+ await self.page.evaluate("var include_filters=''")
461
+ await self.page.request_gc()
462
+
463
+ scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
464
+
465
+ MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
466
+ xpath_data = json.loads(await self.page.evaluate(xpath_element_js, {
467
+ "visualselector_xpath_selectors": scan_elements,
468
+ "max_height": MAX_TOTAL_HEIGHT
469
+ }))
470
+ await self.page.request_gc()
471
+
472
+ # Sort elements by size
473
+ xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
474
+ logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
475
+
476
+ except Exception as e:
477
+ logger.error(f"Error getting current state: {str(e)}")
478
+ # If the page has navigated (common with logins) then the context is destroyed on navigation, continue
479
+ # I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top
480
+ if "Execution context was destroyed" in str(e):
481
+ logger.debug("Execution context was destroyed, most likely because of navigation, continuing...")
482
+ pass
483
+
484
+ # Attempt recovery - force garbage collection
485
+ try:
486
+ await self.page.request_gc()
487
+ except:
488
+ pass
489
+
490
+ # Request garbage collection one final time
491
+ try:
492
+ await self.page.request_gc()
493
+ except:
494
+ pass
495
+
496
+ return (screenshot, xpath_data)
497
+
changedetectionio/blueprint/check_proxies/__init__.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from concurrent.futures import ThreadPoolExecutor
3
+
4
+ from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
5
+ from changedetectionio.store import ChangeDetectionStore
6
+
7
+ from functools import wraps
8
+
9
+ from flask import Blueprint
10
+ from flask_login import login_required
11
+
12
+ STATUS_CHECKING = 0
13
+ STATUS_FAILED = 1
14
+ STATUS_OK = 2
15
+ THREADPOOL_MAX_WORKERS = 3
16
+ _DEFAULT_POOL = ThreadPoolExecutor(max_workers=THREADPOOL_MAX_WORKERS)
17
+
18
+
19
+ # Maybe use fetch-time if its >5 to show some expected load time?
20
+ def threadpool(f, executor=None):
21
+ @wraps(f)
22
+ def wrap(*args, **kwargs):
23
+ return (executor or _DEFAULT_POOL).submit(f, *args, **kwargs)
24
+
25
+ return wrap
26
+
27
+
28
+ def construct_blueprint(datastore: ChangeDetectionStore):
29
+ check_proxies_blueprint = Blueprint('check_proxies', __name__)
30
+ checks_in_progress = {}
31
+
32
+ @threadpool
33
+ def long_task(uuid, preferred_proxy):
34
+ import time
35
+ from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
36
+ from changedetectionio.safe_jinja import render as jinja_render
37
+
38
+ status = {'status': '', 'length': 0, 'text': ''}
39
+
40
+ contents = ''
41
+ now = time.time()
42
+ try:
43
+ processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
44
+ update_handler = processor_module.perform_site_check(datastore=datastore,
45
+ watch_uuid=uuid
46
+ )
47
+
48
+ update_handler.call_browser(preferred_proxy_id=preferred_proxy)
49
+ # title, size is len contents not len xfer
50
+ except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
51
+ if e.status_code == 404:
52
+ status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but 404 (page not found)"})
53
+ elif e.status_code == 403 or e.status_code == 401:
54
+ status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
55
+ else:
56
+ status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
57
+ except FilterNotFoundInResponse:
58
+ status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
59
+ except content_fetcher_exceptions.EmptyReply as e:
60
+ if e.status_code == 403 or e.status_code == 401:
61
+ status.update({'status': 'ERROR OTHER', 'length': len(contents), 'text': f"Got empty reply with code {e.status_code} - Access denied"})
62
+ else:
63
+ status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': f"Empty reply with code {e.status_code}, needs chrome?"})
64
+ except content_fetcher_exceptions.ReplyWithContentButNoText as e:
65
+ txt = f"Got reply but with no content - Status code {e.status_code} - It's possible that the filters were found, but contained no usable text (or contained only an image)."
66
+ status.update({'status': 'ERROR', 'text': txt})
67
+ except Exception as e:
68
+ status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+type(e).__name__+str(e)})
69
+ else:
70
+ status.update({'status': 'OK', 'length': len(contents), 'text': ''})
71
+
72
+ if status.get('text'):
73
+ # parse 'text' as text for safety
74
+ v = {'text': status['text']}
75
+ status['text'] = jinja_render(template_str='{{text|e}}', **v)
76
+
77
+ status['time'] = "{:.2f}s".format(time.time() - now)
78
+
79
+ return status
80
+
81
+ def _recalc_check_status(uuid):
82
+
83
+ results = {}
84
+ for k, v in checks_in_progress.get(uuid, {}).items():
85
+ try:
86
+ r_1 = v.result(timeout=0.05)
87
+ except Exception as e:
88
+ # If timeout error?
89
+ results[k] = {'status': 'RUNNING'}
90
+
91
+ else:
92
+ results[k] = r_1
93
+
94
+ return results
95
+
96
+ @login_required
97
+ @check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
98
+ def get_recheck_status(uuid):
99
+ results = _recalc_check_status(uuid=uuid)
100
+ return results
101
+
102
+ @login_required
103
+ @check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
104
+ def start_check(uuid):
105
+
106
+ if not datastore.proxy_list:
107
+ return
108
+
109
+ if checks_in_progress.get(uuid):
110
+ state = _recalc_check_status(uuid=uuid)
111
+ for proxy_key, v in state.items():
112
+ if v.get('status') == 'RUNNING':
113
+ return state
114
+ else:
115
+ checks_in_progress[uuid] = {}
116
+
117
+ for k, v in datastore.proxy_list.items():
118
+ if not checks_in_progress[uuid].get(k):
119
+ checks_in_progress[uuid][k] = long_task(uuid=uuid, preferred_proxy=k)
120
+
121
+ results = _recalc_check_status(uuid=uuid)
122
+ return results
123
+
124
+ return check_proxies_blueprint
changedetectionio/blueprint/imports/__init__.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, request, redirect, url_for, flash, render_template
2
+ from changedetectionio.store import ChangeDetectionStore
3
+ from changedetectionio.auth_decorator import login_optionally_required
4
+ from changedetectionio import worker_handler
5
+ from changedetectionio.blueprint.imports.importer import (
6
+ import_url_list,
7
+ import_distill_io_json,
8
+ import_xlsx_wachete,
9
+ import_xlsx_custom
10
+ )
11
+
12
+ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
13
+ import_blueprint = Blueprint('imports', __name__, template_folder="templates")
14
+
15
+ @import_blueprint.route("/import", methods=['GET', 'POST'])
16
+ @login_optionally_required
17
+ def import_page():
18
+ remaining_urls = []
19
+ from changedetectionio import forms
20
+
21
+ if request.method == 'POST':
22
+ # URL List import
23
+ if request.values.get('urls') and len(request.values.get('urls').strip()):
24
+ # Import and push into the queue for immediate update check
25
+ importer_handler = import_url_list()
26
+ importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
27
+ for uuid in importer_handler.new_uuids:
28
+ worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
29
+
30
+ if len(importer_handler.remaining_data) == 0:
31
+ return redirect(url_for('watchlist.index'))
32
+ else:
33
+ remaining_urls = importer_handler.remaining_data
34
+
35
+ # Distill.io import
36
+ if request.values.get('distill-io') and len(request.values.get('distill-io').strip()):
37
+ # Import and push into the queue for immediate update check
38
+ d_importer = import_distill_io_json()
39
+ d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
40
+ for uuid in d_importer.new_uuids:
41
+ worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
42
+
43
+ # XLSX importer
44
+ if request.files and request.files.get('xlsx_file'):
45
+ file = request.files['xlsx_file']
46
+
47
+ if request.values.get('file_mapping') == 'wachete':
48
+ w_importer = import_xlsx_wachete()
49
+ w_importer.run(data=file, flash=flash, datastore=datastore)
50
+ else:
51
+ w_importer = import_xlsx_custom()
52
+ # Building mapping of col # to col # type
53
+ map = {}
54
+ for i in range(10):
55
+ c = request.values.get(f"custom_xlsx[col_{i}]")
56
+ v = request.values.get(f"custom_xlsx[col_type_{i}]")
57
+ if c and v:
58
+ map[int(c)] = v
59
+
60
+ w_importer.import_profile = map
61
+ w_importer.run(data=file, flash=flash, datastore=datastore)
62
+
63
+ for uuid in w_importer.new_uuids:
64
+ worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
65
+
66
+ # Could be some remaining, or we could be on GET
67
+ form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
68
+ output = render_template("import.html",
69
+ form=form,
70
+ import_url_list_remaining="\n".join(remaining_urls),
71
+ original_distill_json=''
72
+ )
73
+ return output
74
+
75
+ return import_blueprint
changedetectionio/blueprint/imports/importer.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod
2
+ import time
3
+ from wtforms import ValidationError
4
+ from loguru import logger
5
+
6
+ from changedetectionio.forms import validate_url
7
+
8
+
9
+ class Importer():
10
+ remaining_data = []
11
+ new_uuids = []
12
+ good = 0
13
+
14
+ def __init__(self):
15
+ self.new_uuids = []
16
+ self.good = 0
17
+ self.remaining_data = []
18
+ self.import_profile = None
19
+
20
+ @abstractmethod
21
+ def run(self,
22
+ data,
23
+ flash,
24
+ datastore):
25
+ pass
26
+
27
+
28
+ class import_url_list(Importer):
29
+ """
30
+ Imports a list, can be in <code>https://example.com tag1, tag2, last tag</code> format
31
+ """
32
+ def run(self,
33
+ data,
34
+ flash,
35
+ datastore,
36
+ processor=None
37
+ ):
38
+
39
+ urls = data.split("\n")
40
+ good = 0
41
+ now = time.time()
42
+
43
+ if (len(urls) > 5000):
44
+ flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
45
+
46
+ for url in urls:
47
+ url = url.strip()
48
+ if not len(url):
49
+ continue
50
+
51
+ tags = ""
52
+
53
+ # 'tags' should be a csv list after the URL
54
+ if ' ' in url:
55
+ url, tags = url.split(" ", 1)
56
+
57
+ # Flask wtform validators wont work with basic auth, use validators package
58
+ # Up to 5000 per batch so we dont flood the server
59
+ # @todo validators.url will fail when you add your own IP etc
60
+ if len(url) and 'http' in url.lower() and good < 5000:
61
+ extras = None
62
+ if processor:
63
+ extras = {'processor': processor}
64
+ new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
65
+
66
+ if new_uuid:
67
+ # Straight into the queue.
68
+ self.new_uuids.append(new_uuid)
69
+ good += 1
70
+ continue
71
+
72
+ # Worked past the 'continue' above, append it to the bad list
73
+ if self.remaining_data is None:
74
+ self.remaining_data = []
75
+ self.remaining_data.append(url)
76
+
77
+ flash("{} Imported from list in {:.2f}s, {} Skipped.".format(good, time.time() - now, len(self.remaining_data)))
78
+
79
+
80
+ class import_distill_io_json(Importer):
81
+ def run(self,
82
+ data,
83
+ flash,
84
+ datastore,
85
+ ):
86
+
87
+ import json
88
+ good = 0
89
+ now = time.time()
90
+ self.new_uuids=[]
91
+
92
+ # @todo Use JSONSchema like in the API to validate here.
93
+
94
+ try:
95
+ data = json.loads(data.strip())
96
+ except json.decoder.JSONDecodeError:
97
+ flash("Unable to read JSON file, was it broken?", 'error')
98
+ return
99
+
100
+ if not data.get('data'):
101
+ flash("JSON structure looks invalid, was it broken?", 'error')
102
+ return
103
+
104
+ for d in data.get('data'):
105
+ d_config = json.loads(d['config'])
106
+ extras = {'title': d.get('name', None)}
107
+
108
+ if len(d['uri']) and good < 5000:
109
+ try:
110
+ # @todo we only support CSS ones at the moment
111
+ if d_config['selections'][0]['frames'][0]['excludes'][0]['type'] == 'css':
112
+ extras['subtractive_selectors'] = d_config['selections'][0]['frames'][0]['excludes'][0]['expr']
113
+ except KeyError:
114
+ pass
115
+ except IndexError:
116
+ pass
117
+ extras['include_filters'] = []
118
+ try:
119
+ if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
120
+ extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
121
+ else:
122
+ extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
123
+ except KeyError:
124
+ pass
125
+ except IndexError:
126
+ pass
127
+
128
+ new_uuid = datastore.add_watch(url=d['uri'].strip(),
129
+ tag=",".join(d.get('tags', [])),
130
+ extras=extras,
131
+ write_to_disk_now=False)
132
+
133
+ if new_uuid:
134
+ # Straight into the queue.
135
+ self.new_uuids.append(new_uuid)
136
+ good += 1
137
+
138
+ flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
139
+
140
+
141
+ class import_xlsx_wachete(Importer):
142
+
143
+ def run(self,
144
+ data,
145
+ flash,
146
+ datastore,
147
+ ):
148
+
149
+ good = 0
150
+ now = time.time()
151
+ self.new_uuids = []
152
+
153
+ from openpyxl import load_workbook
154
+
155
+ try:
156
+ wb = load_workbook(data)
157
+ except Exception as e:
158
+ # @todo correct except
159
+ flash("Unable to read export XLSX file, something wrong with the file?", 'error')
160
+ return
161
+
162
+ row_id = 2
163
+ for row in wb.active.iter_rows(min_row=row_id):
164
+ try:
165
+ extras = {}
166
+ data = {}
167
+ for cell in row:
168
+ if not cell.value:
169
+ continue
170
+ column_title = wb.active.cell(row=1, column=cell.column).value.strip().lower()
171
+ data[column_title] = cell.value
172
+
173
+ # Forced switch to webdriver/playwright/etc
174
+ dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases
175
+ # libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
176
+ if 'true' in dynamic_wachet or dynamic_wachet == '1':
177
+ extras['fetch_backend'] = 'html_webdriver'
178
+ elif 'false' in dynamic_wachet or dynamic_wachet == '0':
179
+ extras['fetch_backend'] = 'html_requests'
180
+
181
+ if data.get('xpath'):
182
+ # @todo split by || ?
183
+ extras['include_filters'] = [data.get('xpath')]
184
+ if data.get('name'):
185
+ extras['title'] = data.get('name').strip()
186
+ if data.get('interval (min)'):
187
+ minutes = int(data.get('interval (min)'))
188
+ hours, minutes = divmod(minutes, 60)
189
+ days, hours = divmod(hours, 24)
190
+ weeks, days = divmod(days, 7)
191
+ extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
192
+
193
+ # At minimum a URL is required.
194
+ if data.get('url'):
195
+ try:
196
+ validate_url(data.get('url'))
197
+ except ValidationError as e:
198
+ logger.error(f">> Import URL error {data.get('url')} {str(e)}")
199
+ flash(f"Error processing row number {row_id}, URL value was incorrect, row was skipped.", 'error')
200
+ # Don't bother processing anything else on this row
201
+ continue
202
+
203
+ new_uuid = datastore.add_watch(url=data['url'].strip(),
204
+ extras=extras,
205
+ tag=data.get('folder'),
206
+ write_to_disk_now=False)
207
+ if new_uuid:
208
+ # Straight into the queue.
209
+ self.new_uuids.append(new_uuid)
210
+ good += 1
211
+ except Exception as e:
212
+ logger.error(e)
213
+ flash(f"Error processing row number {row_id}, check all cell data types are correct, row was skipped.", 'error')
214
+ else:
215
+ row_id += 1
216
+
217
+ flash(
218
+ "{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
219
+
220
+
221
+ class import_xlsx_custom(Importer):
222
+
223
+ def run(self,
224
+ data,
225
+ flash,
226
+ datastore,
227
+ ):
228
+
229
+ good = 0
230
+ now = time.time()
231
+ self.new_uuids = []
232
+
233
+ from openpyxl import load_workbook
234
+
235
+ try:
236
+ wb = load_workbook(data)
237
+ except Exception as e:
238
+ # @todo correct except
239
+ flash("Unable to read export XLSX file, something wrong with the file?", 'error')
240
+ return
241
+
242
+ # @todo cehck atleast 2 rows, same in other method
243
+ from changedetectionio.forms import validate_url
244
+ row_i = 1
245
+
246
+ try:
247
+ for row in wb.active.iter_rows():
248
+ url = None
249
+ tags = None
250
+ extras = {}
251
+
252
+ for cell in row:
253
+ if not self.import_profile.get(cell.col_idx):
254
+ continue
255
+ if not cell.value:
256
+ continue
257
+
258
+ cell_map = self.import_profile.get(cell.col_idx)
259
+
260
+ cell_val = str(cell.value).strip() # could be bool
261
+
262
+ if cell_map == 'url':
263
+ url = cell.value.strip()
264
+ try:
265
+ validate_url(url)
266
+ except ValidationError as e:
267
+ logger.error(f">> Import URL error {url} {str(e)}")
268
+ flash(f"Error processing row number {row_i}, URL value was incorrect, row was skipped.", 'error')
269
+ # Don't bother processing anything else on this row
270
+ url = None
271
+ break
272
+ elif cell_map == 'tag':
273
+ tags = cell.value.strip()
274
+ elif cell_map == 'include_filters':
275
+ # @todo validate?
276
+ extras['include_filters'] = [cell.value.strip()]
277
+ elif cell_map == 'interval_minutes':
278
+ hours, minutes = divmod(int(cell_val), 60)
279
+ days, hours = divmod(hours, 24)
280
+ weeks, days = divmod(days, 7)
281
+ extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
282
+ else:
283
+ extras[cell_map] = cell_val
284
+
285
+ # At minimum a URL is required.
286
+ if url:
287
+ new_uuid = datastore.add_watch(url=url,
288
+ extras=extras,
289
+ tag=tags,
290
+ write_to_disk_now=False)
291
+ if new_uuid:
292
+ # Straight into the queue.
293
+ self.new_uuids.append(new_uuid)
294
+ good += 1
295
+ except Exception as e:
296
+ logger.error(e)
297
+ flash(f"Error processing row number {row_i}, check all cell data types are correct, row was skipped.", 'error')
298
+ else:
299
+ row_i += 1
300
+
301
+ flash(
302
+ "{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
changedetectionio/blueprint/imports/templates/import.html ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'base.html' %}
2
+ {% block content %}
3
+ {% from '_helpers.html' import render_field %}
4
+ <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
5
+ <div class="edit-form monospaced-textarea">
6
+
7
+ <div class="tabs collapsable">
8
+ <ul>
9
+ <li class="tab" id=""><a href="#url-list">URL List</a></li>
10
+ <li class="tab"><a href="#distill-io">Distill.io</a></li>
11
+ <li class="tab"><a href="#xlsx">.XLSX &amp; Wachete</a></li>
12
+ </ul>
13
+ </div>
14
+
15
+ <div class="box-wrap inner">
16
+ <form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
17
+ <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
18
+ <div class="tab-pane-inner" id="url-list">
19
+ <div class="pure-control-group">
20
+ Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
21
+ (,):
22
+ <br>
23
+ <p><strong>Example: </strong><code>https://example.com tag1, tag2, last tag</code></p>
24
+ URLs which do not pass validation will stay in the textarea.
25
+ </div>
26
+ {{ render_field(form.processor, class="processor") }}
27
+
28
+ <div class="pure-control-group">
29
+ <textarea name="urls" class="pure-input-1-2" placeholder="https://"
30
+ style="width: 100%;
31
+ font-family:monospace;
32
+ white-space: pre;
33
+ overflow-wrap: normal;
34
+ overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
35
+ </div>
36
+ <div id="quick-watch-processor-type"></div>
37
+
38
+ </div>
39
+
40
+ <div class="tab-pane-inner" id="distill-io">
41
+
42
+
43
+
44
+ <div class="pure-control-group">
45
+ Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
46
+ This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
47
+ <br>
48
+ <p>
49
+ How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
50
+ Be sure to set your default fetcher to Chrome if required.<br>
51
+ </p>
52
+ </div>
53
+
54
+
55
+ <textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
56
+ font-family:monospace;
57
+ white-space: pre;
58
+ overflow-wrap: normal;
59
+ overflow-x: scroll;" placeholder="Example Distill.io JSON export file
60
+
61
+ {
62
+ &quot;client&quot;: {
63
+ &quot;local&quot;: 1
64
+ },
65
+ &quot;data&quot;: [
66
+ {
67
+ &quot;name&quot;: &quot;Unraid | News&quot;,
68
+ &quot;uri&quot;: &quot;https://unraid.net/blog&quot;,
69
+ &quot;config&quot;: &quot;{\&quot;selections\&quot;:[{\&quot;frames\&quot;:[{\&quot;index\&quot;:0,\&quot;excludes\&quot;:[],\&quot;includes\&quot;:[{\&quot;type\&quot;:\&quot;xpath\&quot;,\&quot;expr\&quot;:\&quot;(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\&quot;}]}],\&quot;dynamic\&quot;:true,\&quot;delay\&quot;:2}],\&quot;ignoreEmptyText\&quot;:true,\&quot;includeStyle\&quot;:false,\&quot;dataAttr\&quot;:\&quot;text\&quot;}&quot;,
70
+ &quot;tags&quot;: [],
71
+ &quot;content_type&quot;: 2,
72
+ &quot;state&quot;: 40,
73
+ &quot;schedule&quot;: &quot;{\&quot;type\&quot;:\&quot;INTERVAL\&quot;,\&quot;params\&quot;:{\&quot;interval\&quot;:4447}}&quot;,
74
+ &quot;ts&quot;: &quot;2022-03-27T15:51:15.667Z&quot;
75
+ }
76
+ ]
77
+ }
78
+ " rows="25">{{ original_distill_json }}</textarea>
79
+
80
+ </div>
81
+ <div class="tab-pane-inner" id="xlsx">
82
+ <fieldset>
83
+ <div class="pure-control-group">
84
+ {{ render_field(form.xlsx_file, class="processor") }}
85
+ </div>
86
+ <div class="pure-control-group">
87
+ {{ render_field(form.file_mapping, class="processor") }}
88
+ </div>
89
+ </fieldset>
90
+ <div class="pure-control-group">
91
+ <span class="pure-form-message-inline">
92
+ Table of custom column and data types mapping for the <strong>Custom mapping</strong> File mapping type.
93
+ </span>
94
+ <table style="border: 1px solid #aaa; padding: 0.5rem; border-radius: 4px;">
95
+ <tr>
96
+ <td><strong>Column #</strong></td>
97
+ {% for n in range(4) %}
98
+ <td><input type="number" name="custom_xlsx[col_{{n}}]" style="width: 4rem;" min="1"></td>
99
+ {% endfor %}
100
+ </tr>
101
+ <tr>
102
+ <td><strong>Type</strong></td>
103
+ {% for n in range(4) %}
104
+ <td><select name="custom_xlsx[col_type_{{n}}]">
105
+ <option value="" style="color: #aaa"> -- none --</option>
106
+ <option value="url">URL</option>
107
+ <option value="title">Title</option>
108
+ <option value="include_filters">CSS/xPath filter</option>
109
+ <option value="tag">Group / Tag name(s)</option>
110
+ <option value="interval_minutes">Recheck time (minutes)</option>
111
+ </select></td>
112
+ {% endfor %}
113
+ </tr>
114
+ </table>
115
+ </div>
116
+ </div>
117
+ <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
118
+ </form>
119
+
120
+ </div>
121
+ </div>
122
+
123
+ {% endblock %}
changedetectionio/blueprint/price_data_follower/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from changedetectionio.strtobool import strtobool
3
+ from flask import Blueprint, flash, redirect, url_for
4
+ from flask_login import login_required
5
+ from changedetectionio.store import ChangeDetectionStore
6
+ from changedetectionio import queuedWatchMetaData
7
+ from changedetectionio import worker_handler
8
+ from queue import PriorityQueue
9
+
10
+ PRICE_DATA_TRACK_ACCEPT = 'accepted'
11
+ PRICE_DATA_TRACK_REJECT = 'rejected'
12
+
13
+ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue):
14
+
15
+ price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
16
+
17
+ @login_required
18
+ @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
19
+ def accept(uuid):
20
+ datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
21
+ datastore.data['watching'][uuid]['processor'] = 'restock_diff'
22
+ datastore.data['watching'][uuid].clear_watch()
23
+ worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
24
+ return redirect(url_for("watchlist.index"))
25
+
26
+ @login_required
27
+ @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
28
+ def reject(uuid):
29
+ datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
30
+ return redirect(url_for("watchlist.index"))
31
+
32
+
33
+ return price_data_follower_blueprint
34
+
35
+
changedetectionio/blueprint/rss/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
changedetectionio/blueprint/rss/blueprint.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from changedetectionio.safe_jinja import render as jinja_render
3
+ from changedetectionio.store import ChangeDetectionStore
4
+ from feedgen.feed import FeedGenerator
5
+ from flask import Blueprint, make_response, request, url_for, redirect
6
+ from loguru import logger
7
+ import datetime
8
+ import pytz
9
+ import re
10
+ import time
11
+
12
+
13
+ BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
14
+
15
+ # Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
16
+ def scan_invalid_chars_in_rss(content):
17
+ for match in re.finditer(BAD_CHARS_REGEX, content):
18
+ i = match.start()
19
+ bad_char = content[i]
20
+ hex_value = f"0x{ord(bad_char):02x}"
21
+ # Grab context
22
+ start = max(0, i - 20)
23
+ end = min(len(content), i + 21)
24
+ context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
25
+ logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
26
+ # First match is enough
27
+ return True
28
+
29
+ return False
30
+
31
+
32
+ def clean_entry_content(content):
33
+ cleaned = re.sub(BAD_CHARS_REGEX, '', content)
34
+ return cleaned
35
+
36
+ def construct_blueprint(datastore: ChangeDetectionStore):
37
+ rss_blueprint = Blueprint('rss', __name__)
38
+
39
+ # Some RSS reader situations ended up with rss/ (forward slash after RSS) due
40
+ # to some earlier blueprint rerouting work, it should goto feed.
41
+ @rss_blueprint.route("/", methods=['GET'])
42
+ def extraslash():
43
+ return redirect(url_for('rss.feed'))
44
+
45
+ # Import the login decorator if needed
46
+ # from changedetectionio.auth_decorator import login_optionally_required
47
+ @rss_blueprint.route("", methods=['GET'])
48
+ def feed():
49
+ now = time.time()
50
+ # Always requires token set
51
+ app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
52
+ rss_url_token = request.args.get('token')
53
+ if rss_url_token != app_rss_token:
54
+ return "Access denied, bad token", 403
55
+
56
+ from changedetectionio import diff
57
+ limit_tag = request.args.get('tag', '').lower().strip()
58
+ # Be sure limit_tag is a uuid
59
+ for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
60
+ if limit_tag == tag.get('title', '').lower().strip():
61
+ limit_tag = uuid
62
+
63
+ # Sort by last_changed and add the uuid which is usually the key..
64
+ sorted_watches = []
65
+
66
+ # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
67
+ for uuid, watch in datastore.data['watching'].items():
68
+ # @todo tag notification_muted skip also (improve Watch model)
69
+ if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
70
+ continue
71
+ if limit_tag and not limit_tag in watch['tags']:
72
+ continue
73
+ watch['uuid'] = uuid
74
+ sorted_watches.append(watch)
75
+
76
+ sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
77
+
78
+ fg = FeedGenerator()
79
+ fg.title('changedetection.io')
80
+ fg.description('Feed description')
81
+ fg.link(href='https://changedetection.io')
82
+
83
+ html_colour_enable = False
84
+ if datastore.data['settings']['application'].get('rss_content_format') == 'html':
85
+ html_colour_enable = True
86
+
87
+ for watch in sorted_watches:
88
+
89
+ dates = list(watch.history.keys())
90
+ # Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
91
+ if len(dates) < 2:
92
+ continue
93
+
94
+ if not watch.viewed:
95
+ # Re #239 - GUID needs to be individual for each event
96
+ # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
97
+ guid = "{}/{}".format(watch['uuid'], watch.last_changed)
98
+ fe = fg.add_entry()
99
+
100
+ # Include a link to the diff page, they will have to login here to see if password protection is enabled.
101
+ # Description is the page you watch, link takes you to the diff JS UI page
102
+ # Dict val base_url will get overriden with the env var if it is set.
103
+ ext_base_url = datastore.data['settings']['application'].get('active_base_url')
104
+ # @todo fix
105
+
106
+ # Because we are called via whatever web server, flask should figure out the right path (
107
+ diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
108
+
109
+ fe.link(link=diff_link)
110
+
111
+ # @todo watch should be a getter - watch.get('title') (internally if URL else..)
112
+
113
+ watch_title = watch.get('title') if watch.get('title') else watch.get('url')
114
+ fe.title(title=watch_title)
115
+ try:
116
+
117
+ html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
118
+ newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
119
+ include_equal=False,
120
+ line_feed_sep="<br>",
121
+ html_colour=html_colour_enable
122
+ )
123
+ except FileNotFoundError as e:
124
+ html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
125
+
126
+ # @todo Make this configurable and also consider html-colored markup
127
+ # @todo User could decide if <link> goes to the diff page, or to the watch link
128
+ rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
129
+
130
+ content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
131
+
132
+ # Out of range chars could also break feedgen
133
+ if scan_invalid_chars_in_rss(content):
134
+ content = clean_entry_content(content)
135
+
136
+ fe.content(content=content, type='CDATA')
137
+ fe.guid(guid, permalink=False)
138
+ dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
139
+ dt = dt.replace(tzinfo=pytz.UTC)
140
+ fe.pubDate(dt)
141
+
142
+ response = make_response(fg.rss_str())
143
+ response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
144
+ logger.trace(f"RSS generated in {time.time() - now:.3f}s")
145
+ return response
146
+
147
+ return rss_blueprint
changedetectionio/blueprint/settings/__init__.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from copy import deepcopy
3
+ from datetime import datetime
4
+ from zoneinfo import ZoneInfo, available_timezones
5
+ import secrets
6
+ import flask_login
7
+ from flask import Blueprint, render_template, request, redirect, url_for, flash
8
+
9
+ from changedetectionio.store import ChangeDetectionStore
10
+ from changedetectionio.auth_decorator import login_optionally_required
11
+
12
+
13
+ def construct_blueprint(datastore: ChangeDetectionStore):
14
+ settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
15
+
16
+ @settings_blueprint.route("", methods=['GET', "POST"])
17
+ @login_optionally_required
18
+ def settings_page():
19
+ from changedetectionio import forms
20
+
21
+ default = deepcopy(datastore.data['settings'])
22
+ if datastore.proxy_list is not None:
23
+ available_proxies = list(datastore.proxy_list.keys())
24
+ # When enabled
25
+ system_proxy = datastore.data['settings']['requests']['proxy']
26
+ # In the case it doesnt exist anymore
27
+ if not system_proxy in available_proxies:
28
+ system_proxy = None
29
+
30
+ default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0]
31
+ # Used by the form handler to keep or remove the proxy settings
32
+ default['proxy_list'] = available_proxies[0]
33
+
34
+ # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
35
+ form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None,
36
+ data=default,
37
+ extra_notification_tokens=datastore.get_unique_notification_tokens_available()
38
+ )
39
+
40
+ # Remove the last option 'System default'
41
+ form.application.form.notification_format.choices.pop()
42
+
43
+ if datastore.proxy_list is None:
44
+ # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
45
+ del form.requests.form.proxy
46
+ else:
47
+ form.requests.form.proxy.choices = []
48
+ for p in datastore.proxy_list:
49
+ form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
50
+
51
+ if request.method == 'POST':
52
+ # Password unset is a GET, but we can lock the session to a salted env password to always need the password
53
+ if form.application.form.data.get('removepassword_button', False):
54
+ # SALTED_PASS means the password is "locked" to what we set in the Env var
55
+ if not os.getenv("SALTED_PASS", False):
56
+ datastore.remove_password()
57
+ flash("Password protection removed.", 'notice')
58
+ flask_login.logout_user()
59
+ return redirect(url_for('settings.settings_page'))
60
+
61
+ if form.validate():
62
+ # Don't set password to False when a password is set - should be only removed with the `removepassword` button
63
+ app_update = dict(deepcopy(form.data['application']))
64
+
65
+ # Never update password with '' or False (Added by wtforms when not in submission)
66
+ if 'password' in app_update and not app_update['password']:
67
+ del (app_update['password'])
68
+
69
+ datastore.data['settings']['application'].update(app_update)
70
+
71
+ # Handle dynamic worker count adjustment
72
+ old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
73
+ new_worker_count = form.data['requests'].get('workers', 1)
74
+
75
+ datastore.data['settings']['requests'].update(form.data['requests'])
76
+
77
+ # Adjust worker count if it changed
78
+ if new_worker_count != old_worker_count:
79
+ from changedetectionio import worker_handler
80
+ from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
81
+
82
+ result = worker_handler.adjust_async_worker_count(
83
+ new_count=new_worker_count,
84
+ update_q=update_q,
85
+ notification_q=notification_q,
86
+ app=app,
87
+ datastore=ds
88
+ )
89
+
90
+ if result['status'] == 'success':
91
+ flash(f"Worker count adjusted: {result['message']}", 'notice')
92
+ elif result['status'] == 'not_supported':
93
+ flash("Dynamic worker adjustment not supported for sync workers", 'warning')
94
+ elif result['status'] == 'error':
95
+ flash(f"Error adjusting workers: {result['message']}", 'error')
96
+
97
+ if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
98
+ datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
99
+ datastore.needs_write_urgent = True
100
+ flash("Password protection enabled.", 'notice')
101
+ flask_login.logout_user()
102
+ return redirect(url_for('watchlist.index'))
103
+
104
+ datastore.needs_write_urgent = True
105
+ flash("Settings updated.")
106
+
107
+ else:
108
+ flash("An error occurred, please see below.", "error")
109
+
110
+ # Convert to ISO 8601 format, all date/time relative events stored as UTC time
111
+ utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
112
+
113
+ output = render_template("settings.html",
114
+ api_key=datastore.data['settings']['application'].get('api_access_token'),
115
+ available_timezones=sorted(available_timezones()),
116
+ emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
117
+ extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
118
+ form=form,
119
+ hide_remove_pass=os.getenv("SALTED_PASS", False),
120
+ min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
121
+ settings_application=datastore.data['settings']['application'],
122
+ timezone_default_config=datastore.data['settings']['application'].get('timezone'),
123
+ utc_time=utc_time,
124
+ )
125
+
126
+ return output
127
+
128
+ @settings_blueprint.route("/reset-api-key", methods=['GET'])
129
+ @login_optionally_required
130
+ def settings_reset_api_key():
131
+ secret = secrets.token_hex(16)
132
+ datastore.data['settings']['application']['api_access_token'] = secret
133
+ datastore.needs_write_urgent = True
134
+ flash("API Key was regenerated.")
135
+ return redirect(url_for('settings.settings_page')+'#api')
136
+
137
+ @settings_blueprint.route("/notification-logs", methods=['GET'])
138
+ @login_optionally_required
139
+ def notification_logs():
140
+ from changedetectionio.flask_app import notification_debug_log
141
+ output = render_template("notification-log.html",
142
+ logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
143
+ return output
144
+
145
+ return settings_blueprint