Spaces:
Sleeping
Sleeping
William Mattingly commited on
Commit Β·
eea4038
1
Parent(s): 163e18f
Remove .gitattributes and README copy files; enhance app.py for HuggingFace Spaces compatibility and session cookie handling in proxy environments. Update Dockerfile for improved deployment instructions and user permissions.
Browse files- .gitattributes copy +0 -2
- Dockerfile +59 -28
- README copy.md +0 -180
- README.md +177 -7
- app.py +26 -5
.gitattributes copy
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
# Auto detect text files and perform LF normalization
|
| 2 |
-
* text=auto
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -1,49 +1,80 @@
|
|
| 1 |
-
# ββ Scripture Detector βββββββββββββ
|
| 2 |
#
|
| 3 |
-
#
|
| 4 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
#
|
| 6 |
-
#
|
| 7 |
-
#
|
|
|
|
|
|
|
| 8 |
#
|
| 9 |
-
#
|
| 10 |
-
#
|
| 11 |
-
#
|
|
|
|
| 12 |
# -e SD_DB_DIR=/app/db \
|
| 13 |
-
# -e
|
| 14 |
-
# scripture-detector
|
| 15 |
-
#
|
| 16 |
-
# The default CMD uses --cache-db (in-memory SQLite) which is ideal for
|
| 17 |
-
# workshops and demos where persistence across restarts is not needed.
|
| 18 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
|
| 20 |
FROM python:3.12-slim
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
RUN apt-get update && apt-get install -y --no-install-recommends
|
|
|
|
| 24 |
&& rm -rf /var/lib/apt/lists/*
|
| 25 |
-
RUN pip install --no-cache-dir uv
|
| 26 |
|
|
|
|
|
|
|
| 27 |
WORKDIR /app
|
| 28 |
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
COPY pyproject.toml uv.lock* ./
|
| 33 |
-
|
| 34 |
-
# Sync dependencies (no dev extras)
|
| 35 |
RUN uv sync --no-dev --frozen || uv sync --no-dev
|
| 36 |
|
| 37 |
-
#
|
| 38 |
-
COPY . .
|
| 39 |
|
| 40 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
ENV SD_HOST=0.0.0.0
|
| 42 |
ENV SD_PORT=7860
|
| 43 |
|
| 44 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
EXPOSE 7860
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
#
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ Scripture Detector β HuggingFace Spaces compatible Dockerfile βββββββββββββ
|
| 2 |
#
|
| 3 |
+
# Usage on HuggingFace Spaces
|
| 4 |
+
# βββββββββββββββββββββββββββ
|
| 5 |
+
# 1. Push this repo to a HF Space (Docker SDK).
|
| 6 |
+
# 2. Add the following secrets in Space Settings β Variables and secrets:
|
| 7 |
+
# SD_SECRET_KEY β any long random string, e.g. `openssl rand -hex 32`
|
| 8 |
+
# GEMINI_API_KEY β your Google AI Studio key (optional; can be set in-app)
|
| 9 |
#
|
| 10 |
+
# Local usage (standard)
|
| 11 |
+
# ββββββββββββββββββββββ
|
| 12 |
+
# docker build -t scripture-detector .
|
| 13 |
+
# docker run -p 7860:7860 scripture-detector
|
| 14 |
#
|
| 15 |
+
# Local usage with a persistent database (mount a host directory)
|
| 16 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
# docker run -p 7860:7860 \
|
| 18 |
+
# -v "$(pwd)/db_volume:/app/db" \
|
| 19 |
# -e SD_DB_DIR=/app/db \
|
| 20 |
+
# -e SCRIPTURE_DETECTOR_CACHE_DB=0 \
|
| 21 |
+
# scripture-detector
|
|
|
|
|
|
|
|
|
|
| 22 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
|
| 24 |
FROM python:3.12-slim
|
| 25 |
|
| 26 |
+
# ββ System dependencies βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 28 |
+
git curl \
|
| 29 |
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
| 30 |
|
| 31 |
+
# ββ Non-root user (required by HuggingFace Spaces) βββββββββββββββββββββββββββ
|
| 32 |
+
RUN useradd -m -u 1000 user
|
| 33 |
WORKDIR /app
|
| 34 |
|
| 35 |
+
# ββ Install uv (fast Python package manager) βββββββββββββββββββββββββββββββββ
|
| 36 |
+
RUN pip install --no-cache-dir uv
|
| 37 |
|
| 38 |
+
# ββ Dependencies (cached layer β only re-runs when pyproject.toml changes) βββ
|
| 39 |
+
COPY --chown=user:user pyproject.toml uv.lock* ./
|
|
|
|
|
|
|
|
|
|
| 40 |
RUN uv sync --no-dev --frozen || uv sync --no-dev
|
| 41 |
|
| 42 |
+
# ββ Application source ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
COPY --chown=user:user . .
|
| 44 |
|
| 45 |
+
# ββ Runtime environment βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
# Per-user in-memory database (each browser session gets its own isolated DB).
|
| 47 |
+
ENV SCRIPTURE_DETECTOR_CACHE_DB=1
|
| 48 |
+
|
| 49 |
+
# Tell Flask/app.py to configure SameSite=None; Secure cookies so they work
|
| 50 |
+
# inside HuggingFace's iframe embedding.
|
| 51 |
+
ENV SD_BEHIND_PROXY=1
|
| 52 |
+
|
| 53 |
+
# Bind to all interfaces; HF Spaces expects port 7860.
|
| 54 |
ENV SD_HOST=0.0.0.0
|
| 55 |
ENV SD_PORT=7860
|
| 56 |
|
| 57 |
+
# SD_SECRET_KEY should be set as a HF Space Secret (not hardcoded here).
|
| 58 |
+
# If absent, a random key is generated at startup β sessions reset on redeploy.
|
| 59 |
+
|
| 60 |
+
# Gunicorn needs to see the app module
|
| 61 |
+
ENV PYTHONUNBUFFERED=1
|
| 62 |
+
|
| 63 |
+
# ββ Non-root user βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
+
USER user
|
| 65 |
+
|
| 66 |
EXPOSE 7860
|
| 67 |
|
| 68 |
+
# ββ Start gunicorn ββββββββββββββββββββββββββββββββββοΏ½οΏ½βββββββββββββββββββββββββ
|
| 69 |
+
# Single worker: the per-session in-memory databases live in one process.
|
| 70 |
+
# More than one worker would mean different workers have different session stores,
|
| 71 |
+
# causing "I added a source but now it's gone" bugs.
|
| 72 |
+
CMD ["uv", "run", "gunicorn", \
|
| 73 |
+
"--worker-class", "gthread", \
|
| 74 |
+
"--workers", "1", \
|
| 75 |
+
"--threads", "4", \
|
| 76 |
+
"--bind", "0.0.0.0:7860", \
|
| 77 |
+
"--timeout", "120", \
|
| 78 |
+
"--access-logfile", "-", \
|
| 79 |
+
"--error-logfile", "-", \
|
| 80 |
+
"app:app"]
|
README copy.md
DELETED
|
@@ -1,180 +0,0 @@
|
|
| 1 |
-
# Scripture Detector
|
| 2 |
-
|
| 3 |
-
> **AI-powered detection and analysis of biblical quotations, paraphrases, and allusions in historical texts.**
|
| 4 |
-
|
| 5 |
-

|
| 6 |
-
|
| 7 |
-
Developed by **Dr. William J.B. Mattingly**, Cultural Heritage Data Scientist at Yale University.
|
| 8 |
-
|
| 9 |
-
Built for the international workshop **[Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)** β held **March 5β6, 2026** at the Austrian Academy of Sciences, Vienna.
|
| 10 |
-
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
## Overview
|
| 14 |
-
|
| 15 |
-
Scripture Detector is a local web application that uses Google Gemini to automatically find, classify, and annotate every biblical reference in any text you provide β from full verbatim quotations to subtle allusions. It renders an interactive, color-coded view of the source text with side-by-side Bible verse lookup, distribution charts, and a manual annotation editor.
|
| 16 |
-
|
| 17 |
-
**The only external dependency is a free Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey). No cloud account, credit card, or additional infrastructure is required.
|
| 18 |
-
|
| 19 |
-
---
|
| 20 |
-
|
| 21 |
-
## Screenshots
|
| 22 |
-
|
| 23 |
-
### Sources Page β with Advanced Search
|
| 24 |
-
Manage all your text sources. Use the search bar to search source content in real time, or open the **Advanced** panel to stack filters by Bible book, chapter, or verse with AND / OR logic. Match evidence appears directly on each result card.
|
| 25 |
-
|
| 26 |
-

|
| 27 |
-
|
| 28 |
-
### Text Viewer
|
| 29 |
-
Color-coded annotations appear directly in the text. Click any highlighted passage to see the matched Bible verse(s) in the panel on the right.
|
| 30 |
-
|
| 31 |
-

|
| 32 |
-
|
| 33 |
-
### Analytics Dashboard
|
| 34 |
-
Explore scripture distribution across all sources β broken down by Bible book, testament, and quote type.
|
| 35 |
-
|
| 36 |
-

|
| 37 |
-
|
| 38 |
-
### Settings
|
| 39 |
-
Configure your Gemini API key or Google Vertex AI credentials, and select which Gemini model to use.
|
| 40 |
-
|
| 41 |
-

|
| 42 |
-
|
| 43 |
-
### About Page
|
| 44 |
-
Full documentation of the application, its features, and how it works.
|
| 45 |
-
|
| 46 |
-

|
| 47 |
-
|
| 48 |
-
---
|
| 49 |
-
|
| 50 |
-
## Features
|
| 51 |
-
|
| 52 |
-
- **Full-document AI analysis** β send any text to Gemini and receive a structured list of every scripture reference with verse citations and classification types.
|
| 53 |
-
- **Four classification types**: Full, Partial, Paraphrase, and Allusion.
|
| 54 |
-
- **Color-coded in-text highlighting** β each type is rendered in a distinct color directly within the source text.
|
| 55 |
-
- **Click-to-explore interactivity** β click a highlighted passage to highlight the corresponding annotation card, and vice versa.
|
| 56 |
-
- **Selection-based re-analysis** β highlight any portion of the text to re-run AI detection on just that selection.
|
| 57 |
-
- **Manual annotation editor** with an integrated Bible verse picker (book β chapter β verse).
|
| 58 |
-
- **Advanced search** β real-time multi-filter search across all sources by text content, Bible book, chapter, or verse; filters stack with AND / OR logic and matched evidence appears inline on each result card.
|
| 59 |
-
- **Distribution charts** per source and across all sources (by Bible book, testament, and type).
|
| 60 |
-
- **Global analytics dashboard** aggregating data across all sources.
|
| 61 |
-
- **Model switching** β choose between available Gemini model versions in the UI.
|
| 62 |
-
- **Runs entirely locally** β your texts are never transmitted anywhere except for the Gemini API call itself.
|
| 63 |
-
|
| 64 |
-
---
|
| 65 |
-
|
| 66 |
-
## Advanced Search
|
| 67 |
-
|
| 68 |
-
The Sources page includes a real-time multi-filter search system:
|
| 69 |
-
|
| 70 |
-
| Filter Type | What it matches |
|
| 71 |
-
|---|---|
|
| 72 |
-
| **Text Content** | Any source whose full text contains the search string |
|
| 73 |
-
| **Bible Book** | Any source with at least one reference to the chosen book |
|
| 74 |
-
| **Chapter** | Any source with a reference to the chosen chapter (e.g. Psalms 23) |
|
| 75 |
-
| **Verse** | Any source with a reference to the exact chosen verse (e.g. John 3:16) |
|
| 76 |
-
|
| 77 |
-
Filters can be stacked in any combination. The **AND** mode requires a source to satisfy *every* filter; **OR** mode returns sources satisfying *any* filter. Results update within ~350 ms of each keystroke or dropdown change. Matched text snippets and verse citations appear as inline evidence on each result card.
|
| 78 |
-
|
| 79 |
-
---
|
| 80 |
-
|
| 81 |
-
## Quick Start
|
| 82 |
-
|
| 83 |
-
### 1. Prerequisites
|
| 84 |
-
|
| 85 |
-
- Python 3.10+
|
| 86 |
-
- [uv](https://docs.astral.sh/uv/) (recommended) or pip
|
| 87 |
-
- A free **Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey)
|
| 88 |
-
|
| 89 |
-
### 2. Install & Run
|
| 90 |
-
|
| 91 |
-
```bash
|
| 92 |
-
git clone <repo-url>
|
| 93 |
-
cd scripture-detector
|
| 94 |
-
|
| 95 |
-
# Using uv (recommended)
|
| 96 |
-
uv run python app.py
|
| 97 |
-
|
| 98 |
-
# Or using pip
|
| 99 |
-
pip install -e .
|
| 100 |
-
python app.py
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
The app starts at **http://127.0.0.1:5001**.
|
| 104 |
-
|
| 105 |
-
### 3. Configure API Key
|
| 106 |
-
|
| 107 |
-
1. Open [http://127.0.0.1:5001/settings](http://127.0.0.1:5001/settings)
|
| 108 |
-
2. Select **Gemini API** as the provider
|
| 109 |
-
3. Paste your API key from [Google AI Studio](https://aistudio.google.com/apikey)
|
| 110 |
-
4. Click **Save Settings**
|
| 111 |
-
|
| 112 |
-
### 4. Analyze a Text
|
| 113 |
-
|
| 114 |
-
1. Go to the **Sources** page and click **Add Source**
|
| 115 |
-
2. Give your source a name and paste in the text to analyze
|
| 116 |
-
3. Click **View** to open the text viewer
|
| 117 |
-
4. Click **Process with AI** β Gemini will detect all scripture references within seconds
|
| 118 |
-
5. Click any highlighted passage to explore the matched Bible verses
|
| 119 |
-
|
| 120 |
-
---
|
| 121 |
-
|
| 122 |
-
## Quote Classification
|
| 123 |
-
|
| 124 |
-
| Type | Description |
|
| 125 |
-
|---|---|
|
| 126 |
-
| **Full** | A complete or near-complete verse quoted verbatim |
|
| 127 |
-
| **Partial** | A recognizable portion of a verse with minor variation or truncation |
|
| 128 |
-
| **Paraphrase** | Biblical content restated in different words, preserving the meaning |
|
| 129 |
-
| **Allusion** | A brief phrase, thematic echo, or indirect reference to a specific verse |
|
| 130 |
-
|
| 131 |
-
---
|
| 132 |
-
|
| 133 |
-
## Project Structure
|
| 134 |
-
|
| 135 |
-
```
|
| 136 |
-
scripture-detector/
|
| 137 |
-
βββ app.py # Flask application and API routes
|
| 138 |
-
βββ database.py # SQLite database layer
|
| 139 |
-
βββ main.py # CLI batch evaluation script
|
| 140 |
-
βββ data/
|
| 141 |
-
β βββ bible.tsv # Full Bible verse database (35,000+ verses)
|
| 142 |
-
β βββ book_mapping.tsv
|
| 143 |
-
βββ templates/
|
| 144 |
-
β βββ sources.html # Sources listing page
|
| 145 |
-
β βββ viewer.html # Annotated text viewer
|
| 146 |
-
β βββ dashboard.html # Global analytics dashboard
|
| 147 |
-
β βββ settings.html # API configuration
|
| 148 |
-
β βββ about.html # About / documentation page
|
| 149 |
-
βββ static/
|
| 150 |
-
βββ style.css # Yale color palette stylesheet
|
| 151 |
-
βββ logo.svg # Application logo
|
| 152 |
-
βββ favicon.svg # Browser tab icon
|
| 153 |
-
```
|
| 154 |
-
|
| 155 |
-
---
|
| 156 |
-
|
| 157 |
-
## API Providers
|
| 158 |
-
|
| 159 |
-
### Gemini API (Free Tier)
|
| 160 |
-
The simplest option. Get a free key at [Google AI Studio](https://aistudio.google.com/apikey) β no billing required. Select **Gemini API** in Settings and paste your key.
|
| 161 |
-
|
| 162 |
-
### Google Vertex AI
|
| 163 |
-
For enterprise use or higher rate limits. Requires a Google Cloud project with Vertex AI enabled. Select **Vertex AI** in Settings and enter your project ID and location.
|
| 164 |
-
|
| 165 |
-
---
|
| 166 |
-
|
| 167 |
-
## About
|
| 168 |
-
|
| 169 |
-
**Developer:** Dr. William J.B. Mattingly
|
| 170 |
-
**Affiliation:** Yale University, Cultural Heritage Data Scientist
|
| 171 |
-
**Workshop:** [Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)
|
| 172 |
-
**Workshop Dates:** March 5β6, 2026
|
| 173 |
-
**Workshop Venue:** Austrian Academy of Sciences, PSK Georg-Coch-Platz 2, 1010 Vienna
|
| 174 |
-
**Organisers:** Digital Lab, Institute for Medieval Research, Austrian Academy of Sciences & [SOLEMNE](https://canones.org/), Radboud University
|
| 175 |
-
|
| 176 |
-
---
|
| 177 |
-
|
| 178 |
-
## License
|
| 179 |
-
|
| 180 |
-
MIT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,10 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# Scripture Detector
|
| 2 |
+
|
| 3 |
+
> **AI-powered detection and analysis of biblical quotations, paraphrases, and allusions in historical texts.**
|
| 4 |
+
|
| 5 |
+

|
| 6 |
+
|
| 7 |
+
Developed by **Dr. William J.B. Mattingly**, Cultural Heritage Data Scientist at Yale University.
|
| 8 |
+
|
| 9 |
+
Built for the international workshop **[Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)** β held **March 5β6, 2026** at the Austrian Academy of Sciences, Vienna.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## Overview
|
| 14 |
+
|
| 15 |
+
Scripture Detector is a local web application that uses Google Gemini to automatically find, classify, and annotate every biblical reference in any text you provide β from full verbatim quotations to subtle allusions. It renders an interactive, color-coded view of the source text with side-by-side Bible verse lookup, distribution charts, and a manual annotation editor.
|
| 16 |
+
|
| 17 |
+
**The only external dependency is a free Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey). No cloud account, credit card, or additional infrastructure is required.
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## Screenshots
|
| 22 |
+
|
| 23 |
+
### Sources Page β with Advanced Search
|
| 24 |
+
Manage all your text sources. Use the search bar to search source content in real time, or open the **Advanced** panel to stack filters by Bible book, chapter, or verse with AND / OR logic. Match evidence appears directly on each result card.
|
| 25 |
+
|
| 26 |
+

|
| 27 |
+
|
| 28 |
+
### Text Viewer
|
| 29 |
+
Color-coded annotations appear directly in the text. Click any highlighted passage to see the matched Bible verse(s) in the panel on the right.
|
| 30 |
+
|
| 31 |
+

|
| 32 |
+
|
| 33 |
+
### Analytics Dashboard
|
| 34 |
+
Explore scripture distribution across all sources β broken down by Bible book, testament, and quote type.
|
| 35 |
+
|
| 36 |
+

|
| 37 |
+
|
| 38 |
+
### Settings
|
| 39 |
+
Configure your Gemini API key or Google Vertex AI credentials, and select which Gemini model to use.
|
| 40 |
+
|
| 41 |
+

|
| 42 |
+
|
| 43 |
+
### About Page
|
| 44 |
+
Full documentation of the application, its features, and how it works.
|
| 45 |
+
|
| 46 |
+

|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## Features
|
| 51 |
+
|
| 52 |
+
- **Full-document AI analysis** β send any text to Gemini and receive a structured list of every scripture reference with verse citations and classification types.
|
| 53 |
+
- **Four classification types**: Full, Partial, Paraphrase, and Allusion.
|
| 54 |
+
- **Color-coded in-text highlighting** β each type is rendered in a distinct color directly within the source text.
|
| 55 |
+
- **Click-to-explore interactivity** β click a highlighted passage to highlight the corresponding annotation card, and vice versa.
|
| 56 |
+
- **Selection-based re-analysis** β highlight any portion of the text to re-run AI detection on just that selection.
|
| 57 |
+
- **Manual annotation editor** with an integrated Bible verse picker (book β chapter β verse).
|
| 58 |
+
- **Advanced search** β real-time multi-filter search across all sources by text content, Bible book, chapter, or verse; filters stack with AND / OR logic and matched evidence appears inline on each result card.
|
| 59 |
+
- **Distribution charts** per source and across all sources (by Bible book, testament, and type).
|
| 60 |
+
- **Global analytics dashboard** aggregating data across all sources.
|
| 61 |
+
- **Model switching** β choose between available Gemini model versions in the UI.
|
| 62 |
+
- **Runs entirely locally** β your texts are never transmitted anywhere except for the Gemini API call itself.
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## Advanced Search
|
| 67 |
+
|
| 68 |
+
The Sources page includes a real-time multi-filter search system:
|
| 69 |
+
|
| 70 |
+
| Filter Type | What it matches |
|
| 71 |
+
|---|---|
|
| 72 |
+
| **Text Content** | Any source whose full text contains the search string |
|
| 73 |
+
| **Bible Book** | Any source with at least one reference to the chosen book |
|
| 74 |
+
| **Chapter** | Any source with a reference to the chosen chapter (e.g. Psalms 23) |
|
| 75 |
+
| **Verse** | Any source with a reference to the exact chosen verse (e.g. John 3:16) |
|
| 76 |
+
|
| 77 |
+
Filters can be stacked in any combination. The **AND** mode requires a source to satisfy *every* filter; **OR** mode returns sources satisfying *any* filter. Results update within ~350 ms of each keystroke or dropdown change. Matched text snippets and verse citations appear as inline evidence on each result card.
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
## Quick Start
|
| 82 |
+
|
| 83 |
+
### 1. Prerequisites
|
| 84 |
+
|
| 85 |
+
- Python 3.10+
|
| 86 |
+
- [uv](https://docs.astral.sh/uv/) (recommended) or pip
|
| 87 |
+
- A free **Gemini API key** from [Google AI Studio](https://aistudio.google.com/apikey)
|
| 88 |
+
|
| 89 |
+
### 2. Install & Run
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
git clone <repo-url>
|
| 93 |
+
cd scripture-detector
|
| 94 |
+
|
| 95 |
+
# Using uv (recommended)
|
| 96 |
+
uv run python app.py
|
| 97 |
+
|
| 98 |
+
# Or using pip
|
| 99 |
+
pip install -e .
|
| 100 |
+
python app.py
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
The app starts at **http://127.0.0.1:5001**.
|
| 104 |
+
|
| 105 |
+
### 3. Configure API Key
|
| 106 |
+
|
| 107 |
+
1. Open [http://127.0.0.1:5001/settings](http://127.0.0.1:5001/settings)
|
| 108 |
+
2. Select **Gemini API** as the provider
|
| 109 |
+
3. Paste your API key from [Google AI Studio](https://aistudio.google.com/apikey)
|
| 110 |
+
4. Click **Save Settings**
|
| 111 |
+
|
| 112 |
+
### 4. Analyze a Text
|
| 113 |
+
|
| 114 |
+
1. Go to the **Sources** page and click **Add Source**
|
| 115 |
+
2. Give your source a name and paste in the text to analyze
|
| 116 |
+
3. Click **View** to open the text viewer
|
| 117 |
+
4. Click **Process with AI** β Gemini will detect all scripture references within seconds
|
| 118 |
+
5. Click any highlighted passage to explore the matched Bible verses
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
## Quote Classification
|
| 123 |
+
|
| 124 |
+
| Type | Description |
|
| 125 |
+
|---|---|
|
| 126 |
+
| **Full** | A complete or near-complete verse quoted verbatim |
|
| 127 |
+
| **Partial** | A recognizable portion of a verse with minor variation or truncation |
|
| 128 |
+
| **Paraphrase** | Biblical content restated in different words, preserving the meaning |
|
| 129 |
+
| **Allusion** | A brief phrase, thematic echo, or indirect reference to a specific verse |
|
| 130 |
+
|
| 131 |
+
---
|
| 132 |
+
|
| 133 |
+
## Project Structure
|
| 134 |
+
|
| 135 |
+
```
|
| 136 |
+
scripture-detector/
|
| 137 |
+
βββ app.py # Flask application and API routes
|
| 138 |
+
βββ database.py # SQLite database layer
|
| 139 |
+
βββ main.py # CLI batch evaluation script
|
| 140 |
+
βββ data/
|
| 141 |
+
β βββ bible.tsv # Full Bible verse database (35,000+ verses)
|
| 142 |
+
β βββ book_mapping.tsv
|
| 143 |
+
βββ templates/
|
| 144 |
+
β βββ sources.html # Sources listing page
|
| 145 |
+
β βββ viewer.html # Annotated text viewer
|
| 146 |
+
β βββ dashboard.html # Global analytics dashboard
|
| 147 |
+
β βββ settings.html # API configuration
|
| 148 |
+
β βββ about.html # About / documentation page
|
| 149 |
+
βββ static/
|
| 150 |
+
βββ style.css # Yale color palette stylesheet
|
| 151 |
+
βββ logo.svg # Application logo
|
| 152 |
+
βββ favicon.svg # Browser tab icon
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
---
|
| 156 |
+
|
| 157 |
+
## API Providers
|
| 158 |
+
|
| 159 |
+
### Gemini API (Free Tier)
|
| 160 |
+
The simplest option. Get a free key at [Google AI Studio](https://aistudio.google.com/apikey) β no billing required. Select **Gemini API** in Settings and paste your key.
|
| 161 |
+
|
| 162 |
+
### Google Vertex AI
|
| 163 |
+
For enterprise use or higher rate limits. Requires a Google Cloud project with Vertex AI enabled. Select **Vertex AI** in Settings and enter your project ID and location.
|
| 164 |
+
|
| 165 |
---
|
| 166 |
+
|
| 167 |
+
## About
|
| 168 |
+
|
| 169 |
+
**Developer:** Dr. William J.B. Mattingly
|
| 170 |
+
**Affiliation:** Yale University, Cultural Heritage Data Scientist
|
| 171 |
+
**Workshop:** [Ruse of Reuse: Detecting Text-similarity with AI in Historical Sources](https://www.oeaw.ac.at/en/imafo/events/event-details/ruse-of-reuse)
|
| 172 |
+
**Workshop Dates:** March 5β6, 2026
|
| 173 |
+
**Workshop Venue:** Austrian Academy of Sciences, PSK Georg-Coch-Platz 2, 1010 Vienna
|
| 174 |
+
**Organisers:** Digital Lab, Institute for Medieval Research, Austrian Academy of Sciences & [SOLEMNE](https://canones.org/), Radboud University
|
| 175 |
+
|
| 176 |
---
|
| 177 |
|
| 178 |
+
## License
|
| 179 |
+
|
| 180 |
+
MIT
|
app.py
CHANGED
|
@@ -2,10 +2,11 @@ import os
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
# ββ parse custom flags BEFORE importing database (which reads env vars) βββββββ
|
|
|
|
|
|
|
| 5 |
_argv = sys.argv[1:]
|
| 6 |
if "--cache-db" in _argv:
|
| 7 |
os.environ["SCRIPTURE_DETECTOR_CACHE_DB"] = "1"
|
| 8 |
-
# Remove our custom flags so Flask/werkzeug doesn't choke on them
|
| 9 |
sys.argv = [sys.argv[0]] + [a for a in _argv if a != "--cache-db"]
|
| 10 |
|
| 11 |
import csv
|
|
@@ -18,6 +19,7 @@ from datetime import date
|
|
| 18 |
from pathlib import Path
|
| 19 |
|
| 20 |
from flask import Flask, render_template, jsonify, request, redirect, url_for, Response, session
|
|
|
|
| 21 |
from google import genai
|
| 22 |
|
| 23 |
import database # imported as module so we can write to database.session_local
|
|
@@ -34,11 +36,30 @@ from tei import source_to_tei, tei_to_source_data
|
|
| 34 |
|
| 35 |
app = Flask(__name__)
|
| 36 |
|
| 37 |
-
#
|
| 38 |
-
#
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
app.secret_key = os.environ.get("SD_SECRET_KEY") or os.urandom(32)
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
_CACHE_MODE = bool(os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
|
| 43 |
|
| 44 |
|
|
@@ -49,7 +70,7 @@ def _bind_session_db():
|
|
| 49 |
return
|
| 50 |
if "_db_sid" not in session:
|
| 51 |
session["_db_sid"] = str(uuid.uuid4())
|
| 52 |
-
session.permanent = True
|
| 53 |
database.session_local.session_id = session["_db_sid"]
|
| 54 |
|
| 55 |
PROJECT_ROOT = Path(__file__).resolve().parent
|
|
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
# ββ parse custom flags BEFORE importing database (which reads env vars) βββββββ
|
| 5 |
+
# The --cache-db CLI flag sets the env var so gunicorn deployments can instead
|
| 6 |
+
# set SCRIPTURE_DETECTOR_CACHE_DB=1 in their environment directly.
|
| 7 |
_argv = sys.argv[1:]
|
| 8 |
if "--cache-db" in _argv:
|
| 9 |
os.environ["SCRIPTURE_DETECTOR_CACHE_DB"] = "1"
|
|
|
|
| 10 |
sys.argv = [sys.argv[0]] + [a for a in _argv if a != "--cache-db"]
|
| 11 |
|
| 12 |
import csv
|
|
|
|
| 19 |
from pathlib import Path
|
| 20 |
|
| 21 |
from flask import Flask, render_template, jsonify, request, redirect, url_for, Response, session
|
| 22 |
+
from werkzeug.middleware.proxy_fix import ProxyFix
|
| 23 |
from google import genai
|
| 24 |
|
| 25 |
import database # imported as module so we can write to database.session_local
|
|
|
|
| 36 |
|
| 37 |
app = Flask(__name__)
|
| 38 |
|
| 39 |
+
# Trust the X-Forwarded-* headers from reverse proxies (HuggingFace, nginxβ¦).
|
| 40 |
+
# This lets Flask see the real HTTPS scheme so secure cookies work correctly.
|
| 41 |
+
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
|
| 42 |
+
|
| 43 |
+
# ββ Secret key ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
# Set SD_SECRET_KEY in the environment (HF Spaces β Settings β Secrets) so
|
| 45 |
+
# sessions survive server restarts. A random key is used as a safe fallback
|
| 46 |
+
# (sessions reset whenever the server restarts).
|
| 47 |
app.secret_key = os.environ.get("SD_SECRET_KEY") or os.urandom(32)
|
| 48 |
|
| 49 |
+
# ββ Session cookie settings βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
# HuggingFace Spaces embeds the app inside an <iframe>. Browsers block
|
| 51 |
+
# SameSite=Lax cookies in cross-site iframes, which would create a new session
|
| 52 |
+
# on every request and make the per-user database invisible.
|
| 53 |
+
# SameSite=None + Secure=True is the correct fix for iframe deployments.
|
| 54 |
+
# We detect HTTPS via the ProxyFix-corrected request scheme at runtime so
|
| 55 |
+
# that local HTTP development still works without secure cookies.
|
| 56 |
+
_BEHIND_PROXY = bool(os.environ.get("SD_BEHIND_PROXY") or
|
| 57 |
+
os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
|
| 58 |
+
|
| 59 |
+
if _BEHIND_PROXY:
|
| 60 |
+
app.config["SESSION_COOKIE_SAMESITE"] = "None"
|
| 61 |
+
app.config["SESSION_COOKIE_SECURE"] = True
|
| 62 |
+
|
| 63 |
_CACHE_MODE = bool(os.environ.get("SCRIPTURE_DETECTOR_CACHE_DB"))
|
| 64 |
|
| 65 |
|
|
|
|
| 70 |
return
|
| 71 |
if "_db_sid" not in session:
|
| 72 |
session["_db_sid"] = str(uuid.uuid4())
|
| 73 |
+
session.permanent = True
|
| 74 |
database.session_local.session_id = session["_db_sid"]
|
| 75 |
|
| 76 |
PROJECT_ROOT = Path(__file__).resolve().parent
|