William Mattingly commited on
Commit
eea4038
Β·
1 Parent(s): 163e18f

Remove .gitattributes and README copy files; enhance app.py for HuggingFace Spaces compatibility and session cookie handling in proxy environments. Update Dockerfile for improved deployment instructions and user permissions.

Browse files
Files changed (5) hide show
  1. .gitattributes copy +0 -2
  2. Dockerfile +59 -28
  3. README copy.md +0 -180
  4. README.md +177 -7
  5. app.py +26 -5
.gitattributes copy DELETED
@@ -1,2 +0,0 @@
1
- # Auto detect text files and perform LF normalization
2
- * text=auto
 
 
 
Dockerfile CHANGED
@@ -1,49 +1,80 @@
1
- # ── Scripture Detector ────────────────────────────────────────────────────────
2
  #
3
- # Build:
4
- # docker build -t scripture-detector .
 
 
 
 
5
  #
6
- # Run (ephemeral in-memory database β€” data resets on container restart):
7
- # docker run -p 5001:5001 scripture-detector
 
 
8
  #
9
- # Run (persistent database β€” mount a host directory):
10
- # docker run -p 5001:5001 \
11
- # -v "$(pwd)/data_volume:/app/db" \
 
12
  # -e SD_DB_DIR=/app/db \
13
- # -e GEMINI_API_KEY=your_key_here \
14
- # scripture-detector python app.py
15
- #
16
- # The default CMD uses --cache-db (in-memory SQLite) which is ideal for
17
- # workshops and demos where persistence across restarts is not needed.
18
  # ─────────────────────────────────────────────────────────────────────────────
19
 
20
  FROM python:3.12-slim
21
 
22
- # Install git (required by Hugging Face Spaces build system) and uv
23
- RUN apt-get update && apt-get install -y --no-install-recommends git \
 
24
  && rm -rf /var/lib/apt/lists/*
25
- RUN pip install --no-cache-dir uv
26
 
 
 
27
  WORKDIR /app
28
 
 
 
29
 
30
- # Copy dependency manifest first to leverage Docker layer caching.
31
- # Dependencies are only re-installed when pyproject.toml / uv.lock changes.
32
- COPY pyproject.toml uv.lock* ./
33
-
34
- # Sync dependencies (no dev extras)
35
  RUN uv sync --no-dev --frozen || uv sync --no-dev
36
 
37
- # Copy application source
38
- COPY . .
39
 
40
- # Bind to all interfaces inside the container
 
 
 
 
 
 
 
 
41
  ENV SD_HOST=0.0.0.0
42
  ENV SD_PORT=7860
43
 
44
- # Expose the Flask port
 
 
 
 
 
 
 
 
45
  EXPOSE 7860
46
 
47
- # Default: run with --cache-db (in-memory database, no volume needed).
48
- # To use a persistent database instead, override CMD and set SD_DB_DIR.
49
- CMD ["uv", "run", "python", "app.py", "--cache-db"]
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Scripture Detector β€” HuggingFace Spaces compatible Dockerfile ─────────────
2
  #
3
+ # Usage on HuggingFace Spaces
4
+ # ───────────────────────────
5
+ # 1. Push this repo to a HF Space (Docker SDK).
6
+ # 2. Add the following secrets in Space Settings β†’ Variables and secrets:
7
+ # SD_SECRET_KEY β†’ any long random string, e.g. `openssl rand -hex 32`
8
+ # GEMINI_API_KEY β†’ your Google AI Studio key (optional; can be set in-app)
9
  #
10
+ # Local usage (standard)
11
+ # ──────────────────────
12
+ # docker build -t scripture-detector .
13
+ # docker run -p 7860:7860 scripture-detector
14
  #
15
+ # Local usage with a persistent database (mount a host directory)
16
+ # ───────────────────────────────────────────────────────────────
17
+ # docker run -p 7860:7860 \
18
+ # -v "$(pwd)/db_volume:/app/db" \
19
  # -e SD_DB_DIR=/app/db \
20
+ # -e SCRIPTURE_DETECTOR_CACHE_DB=0 \
21
+ # scripture-detector
 
 
 
22
  # ─────────────────────────────────────────────────────────────────────────────
23
 
24
  FROM python:3.12-slim
25
 
26
+ # ── System dependencies ───────────────────────────────────────────────────────
27
+ RUN apt-get update && apt-get install -y --no-install-recommends \
28
+ git curl \
29
  && rm -rf /var/lib/apt/lists/*
 
30
 
31
+ # ── Non-root user (required by HuggingFace Spaces) ───────────────────────────
32
+ RUN useradd -m -u 1000 user
33
  WORKDIR /app
34
 
35
+ # ── Install uv (fast Python package manager) ─────────────────────────────────
36
+ RUN pip install --no-cache-dir uv
37
 
38
+ # ── Dependencies (cached layer β€” only re-runs when pyproject.toml changes) ───
39
+ COPY --chown=user:user pyproject.toml uv.lock* ./
 
 
 
40
  RUN uv sync --no-dev --frozen || uv sync --no-dev
41
 
42
+ # ── Application source ────────────────────────────────────────────────────────
43
+ COPY --chown=user:user . .
44
 
45
+ # ── Runtime environment ───────────────────────────────────────────────────────
46
+ # Per-user in-memory database (each browser session gets its own isolated DB).
47
+ ENV SCRIPTURE_DETECTOR_CACHE_DB=1
48
+
49
+ # Tell Flask/app.py to configure SameSite=None; Secure cookies so they work
50
+ # inside HuggingFace's iframe embedding.
51
+ ENV SD_BEHIND_PROXY=1
52
+
53
+ # Bind to all interfaces; HF Spaces expects port 7860.
54
  ENV SD_HOST=0.0.0.0
55
  ENV SD_PORT=7860
56
 
57
+ # SD_SECRET_KEY should be set as a HF Space Secret (not hardcoded here).
58
+ # If absent, a random key is generated at startup β€” sessions reset on redeploy.
59
+
60
+ # Gunicorn needs to see the app module
61
+ ENV PYTHONUNBUFFERED=1
62
+
63
+ # ── Non-root user ─────────────────────────────────────────────────────────────
64
+ USER user
65
+
66
  EXPOSE 7860
67
 
68
+ # ── Start gunicorn ──────────────────────────────────��─────────────────────────
69
+ # Single worker: the per-session in-memory databases live in one process.
70
+ # More than one worker would mean different workers have different session stores,
71
+ # causing "I added a source but now it's gone" bugs.
72
+ CMD ["uv", "run", "gunicorn", \
73
+ "--worker-class", "gthread", \
74
+ "--workers", "1", \
75
+ "--threads", "4", \
76
+ "--bind", "0.0.0.0:7860", \
77
+ "--timeout", "120", \
78
+ "--access-logfile", "-", \
79
+ "--error-logfile", "-", \
80
+ "app:app"]
README copy.md DELETED
@@ -1,180 +0,0 @@
1
- # Scripture Detector
2
-
3
- > **AI-powered detection and analysis of biblical quotations, paraphrases, and allusions in historical texts.**
4
-
5
- ![Sources page](static/screenshots/sources.png)
6
-
7
- Developed by **Dr. William J.B. Mattingly**, Cultural Heritage Data Scientist at Yale University.
8
-
9
- Built for the international workshop **[Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)** β€” held **March 5–6, 2026** at the Austrian Academy of Sciences, Vienna.
10
-
11
- ---
12
-
13
- ## Overview
14
-
15
- Scripture Detector is a local web application that uses Google Gemini to automatically find, classify, and annotate every biblical reference in any text you provide β€” from full verbatim quotations to subtle allusions. It renders an interactive, color-coded view of the source text with side-by-side Bible verse lookup, distribution charts, and a manual annotation editor.
16
-
17
- **The only external dependency is a free Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey). No cloud account, credit card, or additional infrastructure is required.
18
-
19
- ---
20
-
21
- ## Screenshots
22
-
23
- ### Sources Page β€” with Advanced Search
24
- Manage all your text sources. Use the search bar to search source content in real time, or open the **Advanced** panel to stack filters by Bible book, chapter, or verse with AND / OR logic. Match evidence appears directly on each result card.
25
-
26
- ![Sources page](static/screenshots/sources.png)
27
-
28
- ### Text Viewer
29
- Color-coded annotations appear directly in the text. Click any highlighted passage to see the matched Bible verse(s) in the panel on the right.
30
-
31
- ![Viewer page](static/screenshots/viewer.png)
32
-
33
- ### Analytics Dashboard
34
- Explore scripture distribution across all sources β€” broken down by Bible book, testament, and quote type.
35
-
36
- ![Dashboard](static/screenshots/dashboard.png)
37
-
38
- ### Settings
39
- Configure your Gemini API key or Google Vertex AI credentials, and select which Gemini model to use.
40
-
41
- ![Settings page](static/screenshots/settings.png)
42
-
43
- ### About Page
44
- Full documentation of the application, its features, and how it works.
45
-
46
- ![About page](static/screenshots/about.png)
47
-
48
- ---
49
-
50
- ## Features
51
-
52
- - **Full-document AI analysis** β€” send any text to Gemini and receive a structured list of every scripture reference with verse citations and classification types.
53
- - **Four classification types**: Full, Partial, Paraphrase, and Allusion.
54
- - **Color-coded in-text highlighting** β€” each type is rendered in a distinct color directly within the source text.
55
- - **Click-to-explore interactivity** β€” click a highlighted passage to highlight the corresponding annotation card, and vice versa.
56
- - **Selection-based re-analysis** β€” highlight any portion of the text to re-run AI detection on just that selection.
57
- - **Manual annotation editor** with an integrated Bible verse picker (book β†’ chapter β†’ verse).
58
- - **Advanced search** β€” real-time multi-filter search across all sources by text content, Bible book, chapter, or verse; filters stack with AND / OR logic and matched evidence appears inline on each result card.
59
- - **Distribution charts** per source and across all sources (by Bible book, testament, and type).
60
- - **Global analytics dashboard** aggregating data across all sources.
61
- - **Model switching** β€” choose between available Gemini model versions in the UI.
62
- - **Runs entirely locally** β€” your texts are never transmitted anywhere except for the Gemini API call itself.
63
-
64
- ---
65
-
66
- ## Advanced Search
67
-
68
- The Sources page includes a real-time multi-filter search system:
69
-
70
- | Filter Type | What it matches |
71
- |---|---|
72
- | **Text Content** | Any source whose full text contains the search string |
73
- | **Bible Book** | Any source with at least one reference to the chosen book |
74
- | **Chapter** | Any source with a reference to the chosen chapter (e.g. Psalms 23) |
75
- | **Verse** | Any source with a reference to the exact chosen verse (e.g. John 3:16) |
76
-
77
- Filters can be stacked in any combination. The **AND** mode requires a source to satisfy *every* filter; **OR** mode returns sources satisfying *any* filter. Results update within ~350 ms of each keystroke or dropdown change. Matched text snippets and verse citations appear as inline evidence on each result card.
78
-
79
- ---
80
-
81
- ## Quick Start
82
-
83
- ### 1. Prerequisites
84
-
85
- - Python 3.10+
86
- - [uv](https://docs.astral.sh/uv/) (recommended) or pip
87
- - A free **Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey)
88
-
89
- ### 2. Install & Run
90
-
91
- ```bash
92
- git clone <repo-url>
93
- cd scripture-detector
94
-
95
- # Using uv (recommended)
96
- uv run python app.py
97
-
98
- # Or using pip
99
- pip install -e .
100
- python app.py
101
- ```
102
-
103
- The app starts at **http://127.0.0.1:5001**.
104
-
105
- ### 3. Configure API Key
106
-
107
- 1. Open [http://127.0.0.1:5001/settings](http://127.0.0.1:5001/settings)
108
- 2. Select **Gemini API** as the provider
109
- 3. Paste your API key from [Google AI Studio](https://aistudio.google.com/apikey)
110
- 4. Click **Save Settings**
111
-
112
- ### 4. Analyze a Text
113
-
114
- 1. Go to the **Sources** page and click **Add Source**
115
- 2. Give your source a name and paste in the text to analyze
116
- 3. Click **View** to open the text viewer
117
- 4. Click **Process with AI** β€” Gemini will detect all scripture references within seconds
118
- 5. Click any highlighted passage to explore the matched Bible verses
119
-
120
- ---
121
-
122
- ## Quote Classification
123
-
124
- | Type | Description |
125
- |---|---|
126
- | **Full** | A complete or near-complete verse quoted verbatim |
127
- | **Partial** | A recognizable portion of a verse with minor variation or truncation |
128
- | **Paraphrase** | Biblical content restated in different words, preserving the meaning |
129
- | **Allusion** | A brief phrase, thematic echo, or indirect reference to a specific verse |
130
-
131
- ---
132
-
133
- ## Project Structure
134
-
135
- ```
136
- scripture-detector/
137
- β”œβ”€β”€ app.py # Flask application and API routes
138
- β”œβ”€β”€ database.py # SQLite database layer
139
- β”œβ”€β”€ main.py # CLI batch evaluation script
140
- β”œβ”€β”€ data/
141
- β”‚ β”œβ”€β”€ bible.tsv # Full Bible verse database (35,000+ verses)
142
- β”‚ └── book_mapping.tsv
143
- β”œβ”€β”€ templates/
144
- β”‚ β”œβ”€β”€ sources.html # Sources listing page
145
- β”‚ β”œβ”€β”€ viewer.html # Annotated text viewer
146
- β”‚ β”œβ”€β”€ dashboard.html # Global analytics dashboard
147
- β”‚ β”œβ”€β”€ settings.html # API configuration
148
- β”‚ └── about.html # About / documentation page
149
- └── static/
150
- β”œβ”€β”€ style.css # Yale color palette stylesheet
151
- β”œβ”€β”€ logo.svg # Application logo
152
- └── favicon.svg # Browser tab icon
153
- ```
154
-
155
- ---
156
-
157
- ## API Providers
158
-
159
- ### Gemini API (Free Tier)
160
- The simplest option. Get a free key at [Google AI Studio](https://aistudio.google.com/apikey) β€” no billing required. Select **Gemini API** in Settings and paste your key.
161
-
162
- ### Google Vertex AI
163
- For enterprise use or higher rate limits. Requires a Google Cloud project with Vertex AI enabled. Select **Vertex AI** in Settings and enter your project ID and location.
164
-
165
- ---
166
-
167
- ## About
168
-
169
- **Developer:** Dr. William J.B. Mattingly
170
- **Affiliation:** Yale University, Cultural Heritage Data Scientist
171
- **Workshop:** [Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)
172
- **Workshop Dates:** March 5–6, 2026
173
- **Workshop Venue:** Austrian Academy of Sciences, PSK Georg-Coch-Platz 2, 1010 Vienna
174
- **Organisers:** Digital Lab, Institute for Medieval Research, Austrian Academy of Sciences & [SOLEMNE](https://canones.org/), Radboud University
175
-
176
- ---
177
-
178
- ## License
179
-
180
- MIT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,10 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Scripture Detector
3
- emoji: πŸ‘€
4
- colorFrom: red
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # Scripture Detector
2
+
3
+ > **AI-powered detection and analysis of biblical quotations, paraphrases, and allusions in historical texts.**
4
+
5
+ ![Sources page](static/screenshots/sources.png)
6
+
7
+ Developed by **Dr. William J.B. Mattingly**, Cultural Heritage Data Scientist at Yale University.
8
+
9
+ Built for the international workshop **[Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)** β€” held **March 5–6, 2026** at the Austrian Academy of Sciences, Vienna.
10
+
11
+ ---
12
+
13
+ ## Overview
14
+
15
+ Scripture Detector is a local web application that uses Google Gemini to automatically find, classify, and annotate every biblical reference in any text you provide β€” from full verbatim quotations to subtle allusions. It renders an interactive, color-coded view of the source text with side-by-side Bible verse lookup, distribution charts, and a manual annotation editor.
16
+
17
+ **The only external dependency is a free Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey). No cloud account, credit card, or additional infrastructure is required.
18
+
19
+ ---
20
+
21
+ ## Screenshots
22
+
23
+ ### Sources Page β€” with Advanced Search
24
+ Manage all your text sources. Use the search bar to search source content in real time, or open the **Advanced** panel to stack filters by Bible book, chapter, or verse with AND / OR logic. Match evidence appears directly on each result card.
25
+
26
+ ![Sources page](static/screenshots/sources.png)
27
+
28
+ ### Text Viewer
29
+ Color-coded annotations appear directly in the text. Click any highlighted passage to see the matched Bible verse(s) in the panel on the right.
30
+
31
+ ![Viewer page](static/screenshots/viewer.png)
32
+
33
+ ### Analytics Dashboard
34
+ Explore scripture distribution across all sources β€” broken down by Bible book, testament, and quote type.
35
+
36
+ ![Dashboard](static/screenshots/dashboard.png)
37
+
38
+ ### Settings
39
+ Configure your Gemini API key or Google Vertex AI credentials, and select which Gemini model to use.
40
+
41
+ ![Settings page](static/screenshots/settings.png)
42
+
43
+ ### About Page
44
+ Full documentation of the application, its features, and how it works.
45
+
46
+ ![About page](static/screenshots/about.png)
47
+
48
+ ---
49
+
50
+ ## Features
51
+
52
+ - **Full-document AI analysis** β€” send any text to Gemini and receive a structured list of every scripture reference with verse citations and classification types.
53
+ - **Four classification types**: Full, Partial, Paraphrase, and Allusion.
54
+ - **Color-coded in-text highlighting** β€” each type is rendered in a distinct color directly within the source text.
55
+ - **Click-to-explore interactivity** β€” click a highlighted passage to highlight the corresponding annotation card, and vice versa.
56
+ - **Selection-based re-analysis** β€” highlight any portion of the text to re-run AI detection on just that selection.
57
+ - **Manual annotation editor** with an integrated Bible verse picker (book β†’ chapter β†’ verse).
58
+ - **Advanced search** β€” real-time multi-filter search across all sources by text content, Bible book, chapter, or verse; filters stack with AND / OR logic and matched evidence appears inline on each result card.
59
+ - **Distribution charts** per source and across all sources (by Bible book, testament, and type).
60
+ - **Global analytics dashboard** aggregating data across all sources.
61
+ - **Model switching** β€” choose between available Gemini model versions in the UI.
62
+ - **Runs entirely locally** β€” your texts are never transmitted anywhere except for the Gemini API call itself.
63
+
64
+ ---
65
+
66
+ ## Advanced Search
67
+
68
+ The Sources page includes a real-time multi-filter search system:
69
+
70
+ | Filter Type | What it matches |
71
+ |---|---|
72
+ | **Text Content** | Any source whose full text contains the search string |
73
+ | **Bible Book** | Any source with at least one reference to the chosen book |
74
+ | **Chapter** | Any source with a reference to the chosen chapter (e.g. Psalms 23) |
75
+ | **Verse** | Any source with a reference to the exact chosen verse (e.g. John 3:16) |
76
+
77
+ Filters can be stacked in any combination. The **AND** mode requires a source to satisfy *every* filter; **OR** mode returns sources satisfying *any* filter. Results update within ~350 ms of each keystroke or dropdown change. Matched text snippets and verse citations appear as inline evidence on each result card.
78
+
79
+ ---
80
+
81
+ ## Quick Start
82
+
83
+ ### 1. Prerequisites
84
+
85
+ - Python 3.10+
86
+ - [uv](https://docs.astral.sh/uv/) (recommended) or pip
87
+ - A free **Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey)
88
+
89
+ ### 2. Install & Run
90
+
91
+ ```bash
92
+ git clone <repo-url>
93
+ cd scripture-detector
94
+
95
+ # Using uv (recommended)
96
+ uv run python app.py
97
+
98
+ # Or using pip
99
+ pip install -e .
100
+ python app.py
101
+ ```
102
+
103
+ The app starts at **http://127.0.0.1:5001**.
104
+
105
+ ### 3. Configure API Key
106
+
107
+ 1. Open [http://127.0.0.1:5001/settings](http://127.0.0.1:5001/settings)
108
+ 2. Select **Gemini API** as the provider
109
+ 3. Paste your API key from [Google AI Studio](https://aistudio.google.com/apikey)
110
+ 4. Click **Save Settings**
111
+
112
+ ### 4. Analyze a Text
113
+
114
+ 1. Go to the **Sources** page and click **Add Source**
115
+ 2. Give your source a name and paste in the text to analyze
116
+ 3. Click **View** to open the text viewer
117
+ 4. Click **Process with AI** β€” Gemini will detect all scripture references within seconds
118
+ 5. Click any highlighted passage to explore the matched Bible verses
119
+
120
+ ---
121
+
122
+ ## Quote Classification
123
+
124
+ | Type | Description |
125
+ |---|---|
126
+ | **Full** | A complete or near-complete verse quoted verbatim |
127
+ | **Partial** | A recognizable portion of a verse with minor variation or truncation |
128
+ | **Paraphrase** | Biblical content restated in different words, preserving the meaning |
129
+ | **Allusion** | A brief phrase, thematic echo, or indirect reference to a specific verse |
130
+
131
+ ---
132
+
133
+ ## Project Structure
134
+
135
+ ```
136
+ scripture-detector/
137
+ β”œβ”€β”€ app.py # Flask application and API routes
138
+ β”œβ”€β”€ database.py # SQLite database layer
139
+ β”œβ”€β”€ main.py # CLI batch evaluation script
140
+ β”œβ”€β”€ data/
141
+ β”‚ β”œβ”€β”€ bible.tsv # Full Bible verse database (35,000+ verses)
142
+ β”‚ └── book_mapping.tsv
143
+ β”œβ”€β”€ templates/
144
+ β”‚ β”œβ”€β”€ sources.html # Sources listing page
145
+ β”‚ β”œβ”€β”€ viewer.html # Annotated text viewer
146
+ β”‚ β”œβ”€β”€ dashboard.html # Global analytics dashboard
147
+ β”‚ β”œβ”€β”€ settings.html # API configuration
148
+ β”‚ └── about.html # About / documentation page
149
+ └── static/
150
+ β”œβ”€β”€ style.css # Yale color palette stylesheet
151
+ β”œβ”€β”€ logo.svg # Application logo
152
+ └── favicon.svg # Browser tab icon
153
+ ```
154
+
155
+ ---
156
+
157
+ ## API Providers
158
+
159
+ ### Gemini API (Free Tier)
160
+ The simplest option. Get a free key at [Google AI Studio](https://aistudio.google.com/apikey) β€” no billing required. Select **Gemini API** in Settings and paste your key.
161
+
162
+ ### Google Vertex AI
163
+ For enterprise use or higher rate limits. Requires a Google Cloud project with Vertex AI enabled. Select **Vertex AI** in Settings and enter your project ID and location.
164
+
165
  ---
166
+
167
+ ## About
168
+
169
+ **Developer:** Dr. William J.B. Mattingly
170
+ **Affiliation:** Yale University, Cultural Heritage Data Scientist
171
+ **Workshop:** [Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)
172
+ **Workshop Dates:** March 5–6, 2026
173
+ **Workshop Venue:** Austrian Academy of Sciences, PSK Georg-Coch-Platz 2, 1010 Vienna
174
+ **Organisers:** Digital Lab, Institute for Medieval Research, Austrian Academy of Sciences & [SOLEMNE](https://canones.org/), Radboud University
175
+
176
  ---
177
 
178
+ ## License
179
+
180
+ MIT
app.py CHANGED
@@ -2,10 +2,11 @@ import os
2
  import sys
3
 
4
  # ── parse custom flags BEFORE importing database (which reads env vars) ───────
 
 
5
  _argv = sys.argv[1:]
6
  if "--cache-db" in _argv:
7
  os.environ["SCRIPTURE_DETECTOR_CACHE_DB"] = "1"
8
- # Remove our custom flags so Flask/werkzeug doesn't choke on them
9
  sys.argv = [sys.argv[0]] + [a for a in _argv if a != "--cache-db"]
10
 
11
  import csv
@@ -18,6 +19,7 @@ from datetime import date
18
  from pathlib import Path
19
 
20
  from flask import Flask, render_template, jsonify, request, redirect, url_for, Response, session
 
21
  from google import genai
22
 
23
  import database # imported as module so we can write to database.session_local
@@ -34,11 +36,30 @@ from tei import source_to_tei, tei_to_source_data
34
 
35
  app = Flask(__name__)
36
 
37
- # Flask sessions need a secret key for signing the session cookie.
38
- # In production set SD_SECRET_KEY in the environment; otherwise a random
39
- # key is generated at startup (sessions survive only while the server runs).
 
 
 
 
 
40
  app.secret_key = os.environ.get("SD_SECRET_KEY") or os.urandom(32)
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  _CACHE_MODE = bool(os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
43
 
44
 
@@ -49,7 +70,7 @@ def _bind_session_db():
49
  return
50
  if "_db_sid" not in session:
51
  session["_db_sid"] = str(uuid.uuid4())
52
- session.permanent = True # honour PERMANENT_SESSION_LIFETIME
53
  database.session_local.session_id = session["_db_sid"]
54
 
55
  PROJECT_ROOT = Path(__file__).resolve().parent
 
2
  import sys
3
 
4
  # ── parse custom flags BEFORE importing database (which reads env vars) ───────
5
+ # The --cache-db CLI flag sets the env var so gunicorn deployments can instead
6
+ # set SCRIPTURE_DETECTOR_CACHE_DB=1 in their environment directly.
7
  _argv = sys.argv[1:]
8
  if "--cache-db" in _argv:
9
  os.environ["SCRIPTURE_DETECTOR_CACHE_DB"] = "1"
 
10
  sys.argv = [sys.argv[0]] + [a for a in _argv if a != "--cache-db"]
11
 
12
  import csv
 
19
  from pathlib import Path
20
 
21
  from flask import Flask, render_template, jsonify, request, redirect, url_for, Response, session
22
+ from werkzeug.middleware.proxy_fix import ProxyFix
23
  from google import genai
24
 
25
  import database # imported as module so we can write to database.session_local
 
36
 
37
  app = Flask(__name__)
38
 
39
+ # Trust the X-Forwarded-* headers from reverse proxies (HuggingFace, nginx…).
40
+ # This lets Flask see the real HTTPS scheme so secure cookies work correctly.
41
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
42
+
43
+ # ── Secret key ────────────────────────────────────────────────────────────────
44
+ # Set SD_SECRET_KEY in the environment (HF Spaces β†’ Settings β†’ Secrets) so
45
+ # sessions survive server restarts. A random key is used as a safe fallback
46
+ # (sessions reset whenever the server restarts).
47
  app.secret_key = os.environ.get("SD_SECRET_KEY") or os.urandom(32)
48
 
49
+ # ── Session cookie settings ───────────────────────────────────────────────────
50
+ # HuggingFace Spaces embeds the app inside an <iframe>. Browsers block
51
+ # SameSite=Lax cookies in cross-site iframes, which would create a new session
52
+ # on every request and make the per-user database invisible.
53
+ # SameSite=None + Secure=True is the correct fix for iframe deployments.
54
+ # We detect HTTPS via the ProxyFix-corrected request scheme at runtime so
55
+ # that local HTTP development still works without secure cookies.
56
+ _BEHIND_PROXY = bool(os.environ.get("SD_BEHIND_PROXY") or
57
+ os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
58
+
59
+ if _BEHIND_PROXY:
60
+ app.config["SESSION_COOKIE_SAMESITE"] = "None"
61
+ app.config["SESSION_COOKIE_SECURE"] = True
62
+
63
  _CACHE_MODE = bool(os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
64
 
65
 
 
70
  return
71
  if "_db_sid" not in session:
72
  session["_db_sid"] = str(uuid.uuid4())
73
+ session.permanent = True
74
  database.session_local.session_id = session["_db_sid"]
75
 
76
  PROJECT_ROOT = Path(__file__).resolve().parent