Gintarė Zokaitytė commited on
Commit
ffe022c
·
0 Parent(s):

Initial dashboard deployment

Browse files
Files changed (8) hide show
  1. .gitignore +32 -0
  2. .streamlit/config.toml +11 -0
  3. DEPLOY.md +188 -0
  4. GITHUB_DEPLOY.md +288 -0
  5. README.md +92 -0
  6. README_GITHUB.md +129 -0
  7. app.py +455 -0
  8. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+
11
+ # Credentials (DO NOT COMMIT)
12
+ .streamlit/secrets.toml
13
+ .env
14
+
15
+ # Data cache (speeds up loading)
16
+ .cache.pkl
17
+ *.pkl
18
+ *.cache
19
+
20
+ # IDE
21
+ .vscode/
22
+ .idea/
23
+ *.swp
24
+ *.swo
25
+ *~
26
+
27
+ # OS
28
+ .DS_Store
29
+ Thumbs.db
30
+
31
+ # Logs
32
+ *.log
.streamlit/config.toml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#d4af37"
3
+ backgroundColor = "#ffffff"
4
+ secondaryBackgroundColor = "#f0f2f6"
5
+ textColor = "#262730"
6
+ font = "sans serif"
7
+
8
+ [server]
9
+ headless = true
10
+ port = 7860
11
+ enableCORS = false
DEPLOY.md ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deployment Guide
2
+
3
+ Two easy options: **HuggingFace Spaces** or **Streamlit Cloud**
4
+
5
+ ---
6
+
7
+ ## Option 1: HuggingFace Spaces (Recommended)
8
+
9
+ ### Step 1: Create Space
10
+
11
+ 1. Go to https://huggingface.co/new-space
12
+ 2. Choose a name (e.g., `annotation-dashboard`)
13
+ 3. Select **Streamlit** as the SDK
14
+ 4. Choose visibility (Public or Private)
15
+ 5. Click **Create Space**
16
+
17
+ ### Step 2: Upload Files
18
+
19
+ Upload these 3 files:
20
+
21
+ - ✅ `app.py`
22
+ - ✅ `requirements.txt`
23
+ - ✅ `.streamlit/config.toml`
24
+
25
+ **How to upload:**
26
+ - Click **Files** tab → **Add file** → Upload each file
27
+ - Or use Git (see below)
28
+
29
+ ### Step 3: Add Secrets
30
+
31
+ 1. Go to **Settings** tab
32
+ 2. Scroll to **Repository secrets**
33
+ 3. Click **New secret**
34
+ 4. Add two secrets:
35
+
36
+ ```
37
+ Name: LABEL_STUDIO_URL
38
+ Value: https://your-labelstudio-instance.com
39
+ ```
40
+
41
+ ```
42
+ Name: LABEL_STUDIO_API_KEY
43
+ Value: your-api-key-here
44
+ ```
45
+
46
+ ### Step 4: Wait for Build
47
+
48
+ - HuggingFace automatically builds your Space
49
+ - Check **Logs** tab if there are issues
50
+ - Dashboard will be live at: `https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME`
51
+
52
+ ### Using Git (Alternative)
53
+
54
+ ```bash
55
+ # Clone your Space
56
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME
57
+ cd SPACE_NAME
58
+
59
+ # Copy files
60
+ cp /path/to/annotation-dashboard/app.py .
61
+ cp /path/to/annotation-dashboard/requirements.txt .
62
+ mkdir -p .streamlit
63
+ cp /path/to/annotation-dashboard/.streamlit/config.toml .streamlit/
64
+
65
+ # Push
66
+ git add .
67
+ git commit -m "Deploy dashboard"
68
+ git push
69
+ ```
70
+
71
+ ---
72
+
73
+ ## Option 2: Streamlit Cloud
74
+
75
+ ### Step 1: Push to GitHub
76
+
77
+ Your dashboard needs to be in a GitHub repository.
78
+
79
+ ```bash
80
+ cd annotation-dashboard
81
+
82
+ # Initialize git if needed
83
+ git init
84
+ git add app.py requirements.txt .streamlit/config.toml .gitignore
85
+ git commit -m "Initial dashboard"
86
+
87
+ # Create repo on GitHub (via web UI), then:
88
+ git remote add origin https://github.com/YOUR_USERNAME/REPO_NAME.git
89
+ git push -u origin main
90
+ ```
91
+
92
+ ### Step 2: Deploy on Streamlit Cloud
93
+
94
+ 1. Go to https://share.streamlit.io/
95
+ 2. Click **New app**
96
+ 3. Connect your GitHub account (if first time)
97
+ 4. Select:
98
+ - **Repository**: Your dashboard repo
99
+ - **Branch**: `main`
100
+ - **Main file path**: `app.py`
101
+ 5. Click **Deploy**
102
+
103
+ ### Step 3: Add Secrets
104
+
105
+ 1. Click **Advanced settings** (before deploying) or **⋮** → **Settings** (after)
106
+ 2. Go to **Secrets** section
107
+ 3. Add in TOML format:
108
+
109
+ ```toml
110
+ LABEL_STUDIO_URL = "https://your-labelstudio-instance.com"
111
+ LABEL_STUDIO_API_KEY = "your-api-key-here"
112
+ ```
113
+
114
+ 4. Click **Save**
115
+
116
+ ### Step 4: Access Dashboard
117
+
118
+ Your app will be at: `https://YOUR_USERNAME-REPO_NAME.streamlit.app`
119
+
120
+ ---
121
+
122
+ ## Comparison
123
+
124
+ | Feature | HuggingFace Spaces | Streamlit Cloud |
125
+ |---------|-------------------|-----------------|
126
+ | **Setup** | Easier (upload files) | Requires GitHub repo |
127
+ | **Free tier** | Generous | Limited hours/month |
128
+ | **Custom domain** | Yes (paid) | Yes (paid) |
129
+ | **Cache persistence** | ❌ No (ephemeral storage) | ❌ No (ephemeral storage) |
130
+ | **Community** | ML/AI focused | Data science focused |
131
+ | **Speed** | Fast | Fast |
132
+
133
+ **Note**: Cache file (`.cache.pkl`) won't persist on either platform. It rebuilds on each cold start (~30s). For persistent cache, you'd need a database or external storage.
134
+
135
+ ---
136
+
137
+ ## Get Your Label Studio API Key
138
+
139
+ 1. Log into Label Studio
140
+ 2. Click your profile (top right)
141
+ 3. **Account & Settings**
142
+ 4. Scroll to **Access Token**
143
+ 5. Copy the token
144
+
145
+ ---
146
+
147
+ ## Troubleshooting
148
+
149
+ ### "Missing credentials" error
150
+
151
+ **Fix**: Check secrets are correctly set
152
+ - HF Spaces: Settings → Repository secrets
153
+ - Streamlit Cloud: App settings → Secrets
154
+
155
+ ### Dashboard loads slowly
156
+
157
+ **Expected**: First load ~30s (fetches all data)
158
+ - Subsequent loads: <5 minutes (cache refresh)
159
+ - Cache doesn't persist on free hosting
160
+
161
+ ### Build fails
162
+
163
+ **Check**:
164
+ 1. All 3 files uploaded (`app.py`, `requirements.txt`, `.streamlit/config.toml`)
165
+ 2. Check build logs for errors
166
+ 3. Verify Python dependencies in `requirements.txt`
167
+
168
+ ### Can't access Label Studio from cloud
169
+
170
+ **Common issue**: Label Studio must be publicly accessible
171
+ - If running locally, cloud can't reach it
172
+ - Use a public URL or cloud-hosted Label Studio instance
173
+
174
+ ---
175
+
176
+ ## Quick Decision Guide
177
+
178
+ **Choose HuggingFace Spaces if:**
179
+ - ✅ You want the easiest setup
180
+ - ✅ You don't have a GitHub repo
181
+ - ✅ You prefer ML-focused platform
182
+
183
+ **Choose Streamlit Cloud if:**
184
+ - ✅ Your code is already on GitHub
185
+ - ✅ You prefer Streamlit's native platform
186
+ - ✅ You want tight GitHub integration
187
+
188
+ Both are excellent choices! 🚀
GITHUB_DEPLOY.md ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deploy from GitHub Organization
2
+
3
+ Step-by-step guide to deploy the dashboard from your GitHub organization.
4
+
5
+ ---
6
+
7
+ ## Step 1: Push to GitHub Organization
8
+
9
+ ### Option A: Create New Repo via GitHub Web
10
+
11
+ 1. Go to your organization on GitHub
12
+ 2. Click **New repository**
13
+ 3. Name it (e.g., `annotation-dashboard`)
14
+ 4. Choose visibility (Public or Private)
15
+ 5. **Don't** initialize with README (we have files already)
16
+ 6. Click **Create repository**
17
+
18
+ Then push your code:
19
+
20
+ ```bash
21
+ cd annotation-dashboard
22
+
23
+ # Initialize git if needed
24
+ git init
25
+
26
+ # Add files
27
+ git add app.py requirements.txt .streamlit/ .gitignore
28
+ git commit -m "Initial dashboard"
29
+
30
+ # Add remote (replace ORG_NAME and REPO_NAME)
31
+ git remote add origin https://github.com/ORG_NAME/REPO_NAME.git
32
+
33
+ # Push
34
+ git branch -M main
35
+ git push -u origin main
36
+ ```
37
+
38
+ ### Option B: Use GitHub CLI (faster)
39
+
40
+ ```bash
41
+ cd annotation-dashboard
42
+
43
+ # Login to GitHub (first time only)
44
+ gh auth login
45
+
46
+ # Create repo in your org and push
47
+ gh repo create ORG_NAME/annotation-dashboard --source=. --public --push
48
+
49
+ # Or private:
50
+ gh repo create ORG_NAME/annotation-dashboard --source=. --private --push
51
+ ```
52
+
53
+ ---
54
+
55
+ ## Step 2: Deploy to HuggingFace Spaces from GitHub
56
+
57
+ ### Link GitHub to HuggingFace
58
+
59
+ 1. Go to https://huggingface.co/new-space
60
+ 2. Choose **Import from GitHub**
61
+ 3. Connect your GitHub account (first time only)
62
+ 4. Select your organization and repository
63
+ 5. Click **Import**
64
+
65
+ ### Add Secrets
66
+
67
+ 1. Once imported, go to **Settings** → **Repository secrets**
68
+ 2. Add:
69
+ - `LABEL_STUDIO_URL`
70
+ - `LABEL_STUDIO_API_KEY`
71
+
72
+ ### Auto-sync
73
+
74
+ Now any push to GitHub automatically updates your HF Space! 🎉
75
+
76
+ ---
77
+
78
+ ## Step 3: Deploy to Streamlit Cloud from GitHub
79
+
80
+ 1. Go to https://share.streamlit.io/
81
+ 2. Click **New app**
82
+ 3. **Connect GitHub** (allow access to organization)
83
+ 4. Select:
84
+ - **Repository**: `ORG_NAME/REPO_NAME`
85
+ - **Branch**: `main`
86
+ - **Main file**: `app.py`
87
+
88
+ 5. **Advanced settings** → **Secrets** → Add:
89
+ ```toml
90
+ LABEL_STUDIO_URL = "https://your-instance.com"
91
+ LABEL_STUDIO_API_KEY = "your-api-key"
92
+ ```
93
+
94
+ 6. Click **Deploy**
95
+
96
+ Your app will be at: `https://ORG_NAME-REPO_NAME.streamlit.app`
97
+
98
+ ---
99
+
100
+ ## Recommended: Add README for GitHub
101
+
102
+ Create a nice README for your GitHub repo:
103
+
104
+ ```bash
105
+ cat > README.md << 'EOF'
106
+ # Annotation Progress Dashboard
107
+
108
+ Live dashboard tracking Lithuanian NER annotation progress.
109
+
110
+ ## 🚀 Live Demo
111
+
112
+ - **HuggingFace**: [link to your space]
113
+ - **Streamlit Cloud**: [link to your app]
114
+
115
+ ## Features
116
+
117
+ - Real-time progress metrics
118
+ - Weekly team statistics
119
+ - Category breakdown
120
+ - Completion projections
121
+ - Fast caching (30s → <2s)
122
+
123
+ ## Local Development
124
+
125
+ \`\`\`bash
126
+ pip install -r requirements.txt
127
+ export LABEL_STUDIO_URL="https://..."
128
+ export LABEL_STUDIO_API_KEY="..."
129
+ streamlit run app.py
130
+ \`\`\`
131
+
132
+ ## Deployment
133
+
134
+ See [DEPLOY.md](DEPLOY.md) for cloud deployment instructions.
135
+
136
+ ## Tech Stack
137
+
138
+ - **Streamlit** - Web framework
139
+ - **Pandas** - Data processing
140
+ - **Plotly** - Interactive charts
141
+ - **Label Studio SDK** - Data source
142
+ EOF
143
+
144
+ git add README.md
145
+ git commit -m "Add README"
146
+ git push
147
+ ```
148
+
149
+ ---
150
+
151
+ ## GitHub Actions (Optional)
152
+
153
+ Auto-deploy on every commit with GitHub Actions:
154
+
155
+ ```bash
156
+ mkdir -p .github/workflows
157
+ cat > .github/workflows/deploy.yml << 'EOF'
158
+ name: Deploy Dashboard
159
+
160
+ on:
161
+ push:
162
+ branches: [main]
163
+
164
+ jobs:
165
+ deploy:
166
+ runs-on: ubuntu-latest
167
+ steps:
168
+ - uses: actions/checkout@v3
169
+ - name: Deploy to HuggingFace
170
+ env:
171
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
172
+ run: |
173
+ pip install huggingface-hub
174
+ huggingface-cli upload YOUR_USERNAME/SPACE_NAME . --repo-type=space
175
+ EOF
176
+ ```
177
+
178
+ Add `HF_TOKEN` secret in GitHub:
179
+ 1. Settings → Secrets and variables → Actions
180
+ 2. New repository secret → `HF_TOKEN`
181
+ 3. Get token from https://huggingface.co/settings/tokens
182
+
183
+ ---
184
+
185
+ ## Team Collaboration
186
+
187
+ ### Add Team Members
188
+
189
+ 1. Go to GitHub repo → **Settings** → **Collaborators**
190
+ 2. Add team members
191
+ 3. They can now push updates
192
+
193
+ ### Protected Branches
194
+
195
+ Require reviews before merging:
196
+
197
+ 1. **Settings** → **Branches**
198
+ 2. **Add rule** for `main`
199
+ 3. Enable:
200
+ - Require pull request reviews
201
+ - Require status checks
202
+
203
+ ---
204
+
205
+ ## Quick Reference
206
+
207
+ ```bash
208
+ # Clone from organization
209
+ git clone https://github.com/ORG_NAME/REPO_NAME.git
210
+
211
+ # Make changes
212
+ git add .
213
+ git commit -m "Update dashboard"
214
+ git push
215
+
216
+ # Both HF Spaces and Streamlit Cloud auto-update!
217
+ ```
218
+
219
+ ---
220
+
221
+ ## Troubleshooting
222
+
223
+ ### Can't push to organization repo
224
+
225
+ **Fix**: Check you have write permissions
226
+ - Ask organization admin to add you
227
+ - Or fork the repo to your personal account
228
+
229
+ ### GitHub Actions failing
230
+
231
+ **Check**:
232
+ 1. `HF_TOKEN` secret is set
233
+ 2. Token has write permissions
234
+ 3. Check Actions logs for details
235
+
236
+ ### Streamlit Cloud can't access private repo
237
+
238
+ **Fix**:
239
+ 1. Make repo public, OR
240
+ 2. Grant Streamlit access in GitHub:
241
+ - Settings → Applications → Streamlit
242
+ - Grant access to organization
243
+
244
+ ---
245
+
246
+ ## Best Practices
247
+
248
+ ✅ **Do**:
249
+ - Use `.gitignore` (already included)
250
+ - Add meaningful commit messages
251
+ - Keep secrets in platform secrets, not code
252
+ - Document changes in commits
253
+
254
+ ❌ **Don't**:
255
+ - Commit `.cache.pkl` (in `.gitignore`)
256
+ - Commit secrets or `.env` files
257
+ - Force push to `main` branch
258
+ - Commit large test data files
259
+
260
+ ---
261
+
262
+ ## Example Workflow
263
+
264
+ ```bash
265
+ # 1. Create feature branch
266
+ git checkout -b feature/add-new-chart
267
+
268
+ # 2. Make changes
269
+ # ... edit app.py ...
270
+
271
+ # 3. Test locally
272
+ streamlit run app.py
273
+
274
+ # 4. Commit and push
275
+ git add app.py
276
+ git commit -m "Add new chart for entity distribution"
277
+ git push origin feature/add-new-chart
278
+
279
+ # 5. Create Pull Request on GitHub
280
+ # 6. Review and merge to main
281
+ # 7. HF Spaces and Streamlit Cloud auto-update! 🎉
282
+ ```
283
+
284
+ ---
285
+
286
+ Need help? Check:
287
+ - [DEPLOY.md](DEPLOY.md) - Cloud deployment details
288
+ - [README.md](README.md) - General dashboard info
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Annotation Progress Dashboard
2
+
3
+ Live dashboard for tracking Lithuanian NER annotation progress.
4
+
5
+ ## Quick Start
6
+
7
+ ### Local
8
+
9
+ ```bash
10
+ pip install -r requirements.txt
11
+
12
+ # Set credentials
13
+ export LABEL_STUDIO_URL="https://your-instance.com"
14
+ export LABEL_STUDIO_API_KEY="your-key"
15
+
16
+ streamlit run app.py
17
+ ```
18
+
19
+ ### Deploy to HuggingFace Spaces
20
+
21
+ 1. Create new Space at https://huggingface.co/new-space (choose **Streamlit** SDK)
22
+
23
+ 2. Upload files:
24
+ - `app.py`
25
+ - `requirements.txt`
26
+ - `.streamlit/config.toml`
27
+
28
+ 3. Add secrets in Space Settings → Variables and secrets:
29
+ - `LABEL_STUDIO_URL` = `https://your-instance.com`
30
+ - `LABEL_STUDIO_API_KEY` = `your-api-key`
31
+
32
+ 4. Done! Your dashboard will auto-build and deploy.
33
+
34
+ ## Get Your API Key
35
+
36
+ 1. Log into Label Studio
37
+ 2. Profile → Account & Settings → Access Token
38
+ 3. Copy the token
39
+
40
+ ## Features
41
+
42
+ - Real-time progress metrics
43
+ - Weekly team statistics
44
+ - Category breakdown (mokslinis/ziniasklaida)
45
+ - Completion projection based on recent pace
46
+ - Auto-refresh every 5 minutes
47
+ - **Fast loading with smart caching**:
48
+ - Disk cache (`.cache.pkl`) persists between runs
49
+ - Only fetches changed projects
50
+ - Parallel fetching (10 projects at once)
51
+ - First load: ~30s, subsequent: <2s
52
+
53
+ ## Caching Explained
54
+
55
+ **Cache location**: `.cache.pkl` in the same directory as `app.py`
56
+
57
+ **How it works**:
58
+ - First run: Fetches all data from Label Studio (~30 seconds)
59
+ - Saves to disk cache
60
+ - Next runs: Only fetches projects that changed (new tasks added)
61
+ - Shows progress bar when fetching
62
+
63
+ **Clear cache**:
64
+ ```bash
65
+ rm .cache.pkl
66
+ ```
67
+
68
+ Or just wait - cache auto-refreshes every 5 minutes.
69
+
70
+ ## Configuration
71
+
72
+ Edit `app.py` to customize:
73
+
74
+ ```python
75
+ GOAL_WORDS = 2_200_000 # Total goal
76
+ CATEGORY_GOAL = 1_100_000 # Per-category goal
77
+ OUR_TEAM_PROJECT_IDS = {...} # Your team project IDs
78
+ CACHE_FILE = Path(".cache.pkl") # Cache location
79
+ ```
80
+
81
+ ## Troubleshooting
82
+
83
+ **Dashboard loads slowly every time**:
84
+ - Cache file may not be writable
85
+ - Check `.cache.pkl` exists after first load
86
+ - On HF Spaces, cache won't persist (limitation of the platform)
87
+
88
+ **"Missing credentials" error**:
89
+ - Check environment variables are set
90
+ - For HF Spaces: verify secrets in Space settings
91
+
92
+ That's it!
README_GITHUB.md ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Annotation Progress Dashboard
2
+
3
+ Live dashboard for tracking Lithuanian NER annotation project progress.
4
+
5
+ ## 🚀 Quick Deploy
6
+
7
+ ### Easiest: HuggingFace Spaces
8
+
9
+ 1. Go to https://huggingface.co/new-space
10
+ 2. Choose **Streamlit** SDK
11
+ 3. Upload: `app.py`, `requirements.txt`, `.streamlit/config.toml`
12
+ 4. Add secrets: `LABEL_STUDIO_URL` and `LABEL_STUDIO_API_KEY`
13
+ 5. Done! 🎉
14
+
15
+ See [DEPLOY.md](DEPLOY.md) for detailed instructions.
16
+
17
+ ## ✨ Features
18
+
19
+ - **Progress Metrics**: Real-time tracking toward 2.2M word goal
20
+ - **Weekly Stats**: Team member contributions with "before" summary
21
+ - **Category Breakdown**: Split by mokslinis/ziniasklaida + status (Ready/Needs Fixing)
22
+ - **Projections**: Estimated completion date based on recent pace
23
+ - **Fast Loading**: Smart caching (30s first load, <2s after)
24
+
25
+ ## 📊 Screenshots
26
+
27
+ [Add screenshots of your dashboard here]
28
+
29
+ ## 🏃 Local Development
30
+
31
+ ```bash
32
+ # Install dependencies
33
+ pip install -r requirements.txt
34
+
35
+ # Set credentials
36
+ export LABEL_STUDIO_URL="https://your-labelstudio-instance.com"
37
+ export LABEL_STUDIO_API_KEY="your-api-key"
38
+
39
+ # Run dashboard
40
+ streamlit run app.py
41
+ ```
42
+
43
+ Visit http://localhost:8501
44
+
45
+ ## ⚙️ Configuration
46
+
47
+ Edit `app.py` to customize:
48
+
49
+ ```python
50
+ GOAL_WORDS = 2_200_000 # Total word goal
51
+ CATEGORY_GOAL = 1_100_000 # Per-category goal
52
+ OUR_TEAM_PROJECT_IDS = {...} # Your team's project IDs
53
+ TEAM_COLORS = {...} # Chart colors per member
54
+ ```
55
+
56
+ ## 🗂️ Project Structure
57
+
58
+ ```
59
+ annotation-dashboard/
60
+ ├── app.py # Main dashboard (all-in-one)
61
+ ├── requirements.txt # Dependencies
62
+ ├── .streamlit/
63
+ │ └── config.toml # Theme & settings
64
+ ├── .cache.pkl # Auto-generated cache
65
+ ├── .gitignore # Git ignore rules
66
+ ├── DEPLOY.md # Cloud deployment guide
67
+ ├── GITHUB_DEPLOY.md # GitHub organization setup
68
+ └── README.md # This file
69
+ ```
70
+
71
+ ## 📚 Documentation
72
+
73
+ - **[DEPLOY.md](DEPLOY.md)** - Deploy to HuggingFace Spaces or Streamlit Cloud
74
+ - **[GITHUB_DEPLOY.md](GITHUB_DEPLOY.md)** - Setup with GitHub organization
75
+
76
+ ## 🔧 Tech Stack
77
+
78
+ - **Streamlit** - Web framework
79
+ - **Pandas** - Data processing
80
+ - **Plotly** - Interactive charts
81
+ - **Requests** - API client
82
+ - **Label Studio** - Data source
83
+
84
+ ## 🚀 Deployment Options
85
+
86
+ | Platform | Pros | Setup Time |
87
+ |----------|------|------------|
88
+ | **HuggingFace Spaces** | Easy upload, ML-focused | 5 min |
89
+ | **Streamlit Cloud** | GitHub integration | 10 min |
90
+ | **Local** | Full control | 2 min |
91
+
92
+ ## 📈 Performance
93
+
94
+ - **First load**: ~30 seconds (fetches all data)
95
+ - **Cached load**: <2 seconds (smart caching)
96
+ - **Auto-refresh**: Every 5 minutes
97
+ - **Cache location**: `.cache.pkl` (in `.gitignore`)
98
+
99
+ ## 🔐 Security
100
+
101
+ ✅ Secrets stored in platform secrets (not in code)
102
+ ✅ `.env` and secrets files in `.gitignore`
103
+ ✅ Cache file excluded from git
104
+ ✅ No hardcoded credentials
105
+
106
+ ## 🤝 Contributing
107
+
108
+ 1. Clone the repo
109
+ 2. Create a feature branch: `git checkout -b feature/amazing-feature`
110
+ 3. Make changes and test locally
111
+ 4. Commit: `git commit -m 'Add amazing feature'`
112
+ 5. Push: `git push origin feature/amazing-feature`
113
+ 6. Open a Pull Request
114
+
115
+ ## 📝 License
116
+
117
+ [Add your license here]
118
+
119
+ ## 👥 Team
120
+
121
+ [Add team members here]
122
+
123
+ ## 📧 Contact
124
+
125
+ [Add contact info or link to organization]
126
+
127
+ ---
128
+
129
+ **Built with ❤️ for the Lithuanian NER Annotation Project**
app.py ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Annotation Progress Dashboard - Simple & Elegant"""
2
+
3
+ import re
4
+ import os
5
+ import pickle
6
+ from pathlib import Path
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ import streamlit as st
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
11
+ import requests
12
+
13
+ # =============================================================================
14
+ # Configuration
15
+ # =============================================================================
16
+
17
+ GOAL_WORDS = 2_200_000
18
+ CATEGORY_GOAL = 1_100_000
19
+
20
+ OUR_TEAM_PROJECT_IDS = {29, 30, 31, 32, 33, 37}
21
+ ANNOTATED_STATES = ["Acceptable", "No Rating"]
22
+ GOAL_STATES = ["Acceptable", "No Rating", "ReqAttn (entities)"]
23
+
24
+ TEAM_COLORS = {
25
+ "A.K. (22)": "#0066cc",
26
+ "J.Š. (23)": "#00cccc",
27
+ "J.Š. (24)": "#00cc00",
28
+ "G.Z. (25)": "#ff9900",
29
+ "L.M. (26)": "#9933ff",
30
+ "M.M. (27)": "#cc0000",
31
+ }
32
+
33
+ # Cache file location (persists between runs)
34
+ CACHE_FILE = Path(".cache.pkl")
35
+
36
+ # =============================================================================
37
+ # Setup
38
+ # =============================================================================
39
+
40
+ st.set_page_config(page_title="Annotation Progress", page_icon="📊", layout="wide")
41
+
42
+
43
+ # =============================================================================
44
+ # Data Loading
45
+ # =============================================================================
46
+
47
+ def fetch_project_data(proj, url, headers):
48
+ """Fetch data from one project (for parallel execution)."""
49
+ pid, name, task_count = proj["id"], proj.get("title", f"Project {proj['id']}"), proj.get("task_number", 0)
50
+ group = "Our Team" if pid in OUR_TEAM_PROJECT_IDS else "Others"
51
+
52
+ rows = []
53
+ page = 1
54
+ while True:
55
+ resp = requests.get(
56
+ f"{url}/api/projects/{pid}/tasks",
57
+ headers=headers,
58
+ params={"page": page, "page_size": 100},
59
+ timeout=30
60
+ )
61
+ resp.raise_for_status()
62
+ data = resp.json()
63
+ tasks = data if isinstance(data, list) else data.get("tasks", [])
64
+
65
+ if not tasks:
66
+ break
67
+
68
+ for task in tasks:
69
+ task_data = task.get("data", {})
70
+ words = task_data.get("words") or len(task_data.get("text", "").split())
71
+ category = task_data.get("category")
72
+
73
+ annots = [a for a in task.get("annotations", []) if not a.get("was_cancelled")]
74
+ if not annots:
75
+ rows.append({
76
+ "project_id": pid, "project": name, "project_group": group,
77
+ "date": None, "state": "Not Annotated",
78
+ "words": int(words), "category": category
79
+ })
80
+ continue
81
+
82
+ ann = annots[0]
83
+ date = ann.get("created_at", "")[:10] or None
84
+
85
+ rating = None
86
+ for item in ann.get("result", []):
87
+ if item.get("type") == "choices" and item.get("from_name") == "text_rating":
88
+ rating = item.get("value", {}).get("choices", [None])[0]
89
+ break
90
+
91
+ has_entities = any(i.get("type") == "labels" for i in ann.get("result", []))
92
+ if rating is None:
93
+ state = "No Rating"
94
+ elif rating == "Requires Attention":
95
+ state = f"ReqAttn ({'entities' if has_entities else 'empty'})"
96
+ elif rating == "Unacceptable":
97
+ state = f"Unacceptable ({'entities' if has_entities else 'empty'})"
98
+ else:
99
+ state = "Acceptable"
100
+
101
+ rows.append({
102
+ "project_id": pid, "project": name, "project_group": group,
103
+ "date": date, "state": state,
104
+ "words": int(words), "category": category
105
+ })
106
+
107
+ if isinstance(data, list) and len(data) < 100:
108
+ break
109
+ if isinstance(data, dict) and not data.get("next"):
110
+ break
111
+ page += 1
112
+
113
+ return pid, task_count, rows
114
+
115
+
116
+ @st.cache_data(ttl=300)
117
+ def load_data():
118
+ """Load annotation data from Label Studio with disk cache."""
119
+ try:
120
+ url = st.secrets.get("LABEL_STUDIO_URL", os.getenv("LABEL_STUDIO_URL", "")).rstrip("/")
121
+ key = st.secrets.get("LABEL_STUDIO_API_KEY", os.getenv("LABEL_STUDIO_API_KEY", ""))
122
+ except (KeyError, FileNotFoundError, AttributeError):
123
+ url = os.getenv("LABEL_STUDIO_URL", "").rstrip("/")
124
+ key = os.getenv("LABEL_STUDIO_API_KEY", "")
125
+
126
+ if not url or not key:
127
+ st.error("Missing credentials. Set LABEL_STUDIO_URL and LABEL_STUDIO_API_KEY.")
128
+ st.stop()
129
+
130
+ headers = {"Authorization": f"Token {key}"}
131
+
132
+ # Fetch all projects
133
+ resp = requests.get(f"{url}/api/projects", headers=headers, timeout=30)
134
+ resp.raise_for_status()
135
+ projects = resp.json().get("results", [])
136
+
137
+ # Load cache
138
+ cache = {}
139
+ if CACHE_FILE.exists():
140
+ try:
141
+ with open(CACHE_FILE, "rb") as f:
142
+ cache = pickle.load(f)
143
+ except Exception:
144
+ cache = {}
145
+
146
+ # Check which projects need updating
147
+ projects_to_fetch = []
148
+ all_rows = []
149
+
150
+ for proj in projects:
151
+ pid = proj["id"]
152
+ task_count = proj.get("task_number", 0)
153
+ cache_key = f"project_{pid}"
154
+
155
+ # Use cache if task count unchanged
156
+ if cache_key in cache and cache[cache_key]["task_count"] == task_count:
157
+ all_rows.extend(cache[cache_key]["rows"])
158
+ else:
159
+ projects_to_fetch.append(proj)
160
+
161
+ # Fetch updated projects in parallel
162
+ if projects_to_fetch:
163
+ with ThreadPoolExecutor(max_workers=10) as executor:
164
+ futures = [executor.submit(fetch_project_data, proj, url, headers) for proj in projects_to_fetch]
165
+
166
+ progress = st.progress(0, text=f"Loading {len(projects_to_fetch)} projects...")
167
+ for i, future in enumerate(futures):
168
+ pid, task_count, rows = future.result()
169
+ all_rows.extend(rows)
170
+ cache[f"project_{pid}"] = {"task_count": task_count, "rows": rows}
171
+ progress.progress((i + 1) / len(futures), text=f"Loaded {i + 1}/{len(futures)} projects")
172
+ progress.empty()
173
+
174
+ # Save cache
175
+ try:
176
+ with open(CACHE_FILE, "wb") as f:
177
+ pickle.dump(cache, f)
178
+ except Exception:
179
+ pass
180
+
181
+ # Create dataframe
182
+ df = pd.DataFrame(all_rows)
183
+ df["words"] = df["words"].astype(int)
184
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
185
+ df["is_annotated"] = df["state"].isin(ANNOTATED_STATES)
186
+ df["is_goal_state"] = df["state"].isin(GOAL_STATES)
187
+
188
+ return df
189
+
190
+
191
+ # =============================================================================
192
+ # Helper Functions
193
+ # =============================================================================
194
+
195
+ def anonymize(name):
196
+ """Convert '26 [Lukas Malakauskas]' to 'L.M. (26)'"""
197
+ if name == "Others":
198
+ return "Others"
199
+ match = re.match(r"(\d+)\s+\[(.+?)\]", name)
200
+ if match:
201
+ num, full = match.groups()
202
+ parts = full.split()
203
+ if len(parts) >= 2:
204
+ return f"{parts[0][0]}.{parts[-1][0]}. ({num})"
205
+ return name
206
+
207
+
208
+ # =============================================================================
209
+ # Main App
210
+ # =============================================================================
211
+
212
+ st.title("📊 Annotation Progress Dashboard")
213
+ st.markdown("---")
214
+
215
+ # Load data
216
+ with st.spinner("Loading..."):
217
+ df = load_data()
218
+
219
+ # Overview metrics
220
+ total = df[df["is_goal_state"]]["words"].sum()
221
+ remaining = GOAL_WORDS - total
222
+ progress = total / GOAL_WORDS * 100
223
+
224
+ col1, col2 = st.columns(2)
225
+ col1.metric("Progress toward 2.2M", f"{total:,}", f"{progress:.1f}%")
226
+ col2.metric("Remaining", f"{remaining:,}", f"{100-progress:.1f}%")
227
+
228
+ st.markdown("---")
229
+
230
+ # Tabs
231
+ tab1, tab2 = st.tabs(["📊 Weekly Stats", "⏱️ Pacing"])
232
+
233
+ # ============== TAB 1: Weekly Stats ==============
234
+ with tab1:
235
+ st.caption("Goal states (Acceptable + No Rating + ReqAttn with entities)")
236
+
237
+ cutoff_date = pd.Timestamp("2025-12-22")
238
+
239
+ # Filter data - use GOAL_STATES to match progress metrics
240
+ df_week = df[df["is_goal_state"] & df["date"].notna()].copy()
241
+ df_week["week_start"] = df_week["date"] - pd.to_timedelta(df_week["date"].dt.dayofweek, unit="d")
242
+ df_week["member"] = df_week.apply(
243
+ lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others",
244
+ axis=1
245
+ )
246
+
247
+ # Weekly pivot (all data)
248
+ weekly_all = df_week.pivot_table(
249
+ index="week_start", columns="member", values="words", aggfunc="sum", fill_value=0
250
+ ).astype(int)
251
+
252
+ # Split into before and after cutoff
253
+ weekly_before = weekly_all[weekly_all.index < cutoff_date]
254
+ weekly_after = weekly_all[weekly_all.index >= cutoff_date]
255
+
256
+ # Ensure consistent columns
257
+ all_members = set(weekly_all.columns)
258
+ if "Others" not in all_members:
259
+ all_members.add("Others")
260
+
261
+ for member in all_members:
262
+ if member not in weekly_after.columns:
263
+ weekly_after[member] = 0
264
+ if member not in weekly_before.columns:
265
+ weekly_before[member] = 0
266
+
267
+ # Sort columns by total contribution
268
+ totals = weekly_all.sum().sort_values(ascending=False)
269
+ weekly_after = weekly_after[totals.index]
270
+ weekly_after["Total"] = weekly_after.sum(axis=1)
271
+
272
+ # Calculate "Before" summary row
273
+ before_totals = weekly_before[totals.index].sum()
274
+ before_totals["Total"] = before_totals.sum()
275
+
276
+ # Format weekly data for display
277
+ display = weekly_after.reset_index()
278
+ display["Week"] = (
279
+ display["week_start"].dt.strftime("%Y-%m-%d") + " - " +
280
+ (display["week_start"] + pd.Timedelta(days=6)).dt.strftime("%Y-%m-%d")
281
+ )
282
+ display = display.drop("week_start", axis=1)
283
+ display = display[["Week"] + list(totals.index) + ["Total"]]
284
+
285
+ # Add "Before" row at the beginning
286
+ before_row = pd.DataFrame([{"Week": f"Before {cutoff_date.strftime('%Y-%m-%d')}", **before_totals}])
287
+ display = pd.concat([before_row, display], ignore_index=True)
288
+
289
+ # Add TOTAL row at the end
290
+ all_totals = weekly_all[totals.index].sum()
291
+ all_totals["Total"] = all_totals.sum()
292
+ total_row = pd.DataFrame([{"Week": "TOTAL", **all_totals}])
293
+ display = pd.concat([display, total_row], ignore_index=True)
294
+
295
+ # Format numbers
296
+ for col in display.columns:
297
+ if col != "Week":
298
+ display[col] = display[col].apply(lambda x: f"{int(x):,}" if pd.notna(x) else "")
299
+
300
+ # Style and show
301
+ def style_row(row):
302
+ if row["Week"] == "TOTAL":
303
+ return ["font-weight: bold; background-color: #f0f0f0;"] * len(row)
304
+ elif row["Week"].startswith("Before"):
305
+ return ["font-style: italic; background-color: #f9f9f9;"] * len(row)
306
+ return [""] * len(row)
307
+
308
+ styled = display.style.apply(style_row, axis=1).set_properties(subset=["Total"], **{"font-weight": "bold"})
309
+ st.dataframe(styled, hide_index=True, use_container_width=True)
310
+
311
+ # ============== TAB 2: Pacing ==============
312
+ with tab2:
313
+ st.subheader("Category Breakdown")
314
+ st.caption("Requirement: 1.1M words from each category")
315
+
316
+ # Split by status: Ready vs Needs Fixing
317
+ df_ready = df[df["is_annotated"]] # Acceptable + No Rating
318
+ df_needs_fixing = df[df["state"] == "ReqAttn (entities)"]
319
+ df_total = df[df["is_goal_state"]]
320
+
321
+ # Calculate by category
322
+ mok_ready = df_ready[df_ready["category"] == "mokslinis"]["words"].sum()
323
+ mok_fixing = df_needs_fixing[df_needs_fixing["category"] == "mokslinis"]["words"].sum()
324
+ mok_total = mok_ready + mok_fixing
325
+
326
+ zin_ready = df_ready[df_ready["category"] == "ziniasklaida"]["words"].sum()
327
+ zin_fixing = df_needs_fixing[df_needs_fixing["category"] == "ziniasklaida"]["words"].sum()
328
+ zin_total = zin_ready + zin_fixing
329
+
330
+ total_ready = mok_ready + zin_ready
331
+ total_fixing = mok_fixing + zin_fixing
332
+ total_all = total_ready + total_fixing
333
+
334
+ cat_df = pd.DataFrame({
335
+ "Category": ["mokslinis", "ziniasklaida", "TOTAL"],
336
+ "Ready": [f"{mok_ready:,}", f"{zin_ready:,}", f"{total_ready:,}"],
337
+ "Needs Fixing": [f"{mok_fixing:,}", f"{zin_fixing:,}", f"{total_fixing:,}"],
338
+ "Total": [f"{mok_total:,}", f"{zin_total:,}", f"{total_all:,}"],
339
+ "Goal": [f"{CATEGORY_GOAL:,}", f"{CATEGORY_GOAL:,}", f"{GOAL_WORDS:,}"],
340
+ "Progress": [
341
+ f"{mok_total/CATEGORY_GOAL*100:.1f}%",
342
+ f"{zin_total/CATEGORY_GOAL*100:.1f}%",
343
+ f"{total_all/GOAL_WORDS*100:.1f}%"
344
+ ]
345
+ })
346
+ st.dataframe(cat_df, hide_index=True, use_container_width=True)
347
+
348
+ st.markdown("---")
349
+ st.header("Cumulative Progress & Projection")
350
+
351
+ # Cumulative data
352
+ df_cum = df[df["is_goal_state"] & df["date"].notna()].copy()
353
+ df_cum["member"] = df_cum.apply(
354
+ lambda r: anonymize(r["project"]) if r["project_group"] == "Our Team" else "Others",
355
+ axis=1
356
+ )
357
+
358
+ daily = df_cum.groupby(["date", "member"])["words"].sum().reset_index()
359
+ pivot = daily.pivot_table(index="date", columns="member", values="words", fill_value=0)
360
+ cumulative = pivot.sort_index().cumsum()
361
+ cumulative["Total"] = cumulative.sum(axis=1)
362
+ cumulative = cumulative[cumulative.index >= pd.Timestamp("2025-12-18")]
363
+
364
+ # Projection calculation
365
+ last_date = cumulative.index[-1]
366
+ current = cumulative["Total"].iloc[-1]
367
+
368
+ # Calculate rate from last 14 days
369
+ lookback = cumulative[cumulative.index >= last_date - pd.Timedelta(days=14)]
370
+ if len(lookback) >= 2:
371
+ days = (last_date - lookback.index[0]).days or 1
372
+ rate = (current - lookback["Total"].iloc[0]) / days
373
+ days_left = (GOAL_WORDS - current) / rate if rate > 0 else 0
374
+ completion = last_date + pd.Timedelta(days=days_left)
375
+ weekly_rate = rate * 7
376
+ else:
377
+ rate = completion = weekly_rate = None
378
+
379
+ # Chart
380
+ fig = go.Figure()
381
+
382
+ # Goal lines
383
+ fig.add_hline(y=1_100_000, line_dash="dot", line_color="orange",
384
+ annotation_text="Midpoint: 1.1M", annotation_position="top left")
385
+ fig.add_hline(y=GOAL_WORDS, line_dash="dot", line_color="red",
386
+ annotation_text="Goal: 2.2M", annotation_position="top left")
387
+
388
+ # Members
389
+ members = [c for c in cumulative.columns if c not in ["Total", "Others"]]
390
+ members = sorted(members, key=lambda x: cumulative[x].iloc[-1], reverse=True)
391
+
392
+ if "Others" in cumulative.columns:
393
+ fig.add_trace(go.Scatter(
394
+ x=cumulative.index, y=cumulative["Others"],
395
+ name=f"Others: {cumulative['Others'].iloc[-1]:,.0f}",
396
+ mode="lines", line=dict(width=2, color="#7f8c8d")
397
+ ))
398
+
399
+ for m in members:
400
+ color = TEAM_COLORS.get(m, "#34495e")
401
+ fig.add_trace(go.Scatter(
402
+ x=cumulative.index, y=cumulative[m],
403
+ name=f"{m}: {cumulative[m].iloc[-1]:,.0f}",
404
+ mode="lines", line=dict(width=2, color=color)
405
+ ))
406
+
407
+ # Total
408
+ fig.add_trace(go.Scatter(
409
+ x=cumulative.index, y=cumulative["Total"],
410
+ name=f"Total: {cumulative['Total'].iloc[-1]:,.0f}",
411
+ mode="lines", line=dict(width=3, color="#d4af37"),
412
+ fill="tozeroy", fillcolor="rgba(212, 175, 55, 0.1)"
413
+ ))
414
+
415
+ # Projection
416
+ if completion:
417
+ proj_dates = pd.date_range(last_date, completion, freq="D")
418
+ proj_vals = current + rate * (proj_dates - last_date).days
419
+ fig.add_trace(go.Scatter(
420
+ x=proj_dates, y=proj_vals,
421
+ name=f"Projection ({int(weekly_rate):,}/wk)",
422
+ mode="lines", line=dict(width=3, color="#d4af37", dash="dot")
423
+ ))
424
+ fig.add_trace(go.Scatter(
425
+ x=[completion], y=[GOAL_WORDS],
426
+ mode="markers+text", marker=dict(size=14, color="#d4af37", symbol="diamond"),
427
+ text=[completion.strftime("%b %d")], textposition="top center",
428
+ showlegend=False
429
+ ))
430
+ title = f"Cumulative Progress → Est. {completion.strftime('%B %d, %Y')}"
431
+ else:
432
+ title = "Cumulative Progress"
433
+
434
+ fig.update_layout(
435
+ title=title, xaxis_title="Date", yaxis_title="Cumulative Words",
436
+ height=600, hovermode="x unified", template="plotly_white"
437
+ )
438
+ fig.update_yaxes(tickformat=".2s")
439
+
440
+ st.plotly_chart(fig, use_container_width=True)
441
+
442
+ # Metrics
443
+ if completion:
444
+ st.markdown("### Pacing Estimates")
445
+ c1, c2, c3 = st.columns(3)
446
+ c1.metric("Per Week Rate", f"{int(weekly_rate):,} words")
447
+ c2.metric("Weeks Remaining", f"{days_left/7:.1f} weeks")
448
+ c3.metric("Est. Completion", completion.strftime("%Y-%m-%d"))
449
+
450
+ # Footer
451
+ st.markdown("---")
452
+ st.caption(
453
+ f"Updated: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} | "
454
+ "Auto-refresh: 5 min | Press 'R' to refresh"
455
+ )
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ pandas>=2.0.0
3
+ plotly>=5.17.0
4
+ requests>=2.31.0