Upload 3 files
Browse files
README.md
CHANGED
|
@@ -1,12 +1,228 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Remote Audit App - Setup Instructions
|
| 2 |
+
|
| 3 |
+
This Hugging Face Space performs design-based tests of randomization integrity using pre-treatment satellite imagery, implementing the conditional randomization test from your paper.
|
| 4 |
+
|
| 5 |
+
## Quick Start
|
| 6 |
+
|
| 7 |
+
### Option 1: Use Pre-computed Satellite Data (Recommended for HF)
|
| 8 |
+
|
| 9 |
+
The app expects satellite features (NDVI, EVI, VIIRS) to be pre-computed. To replicate the Begum et al. 2022 audit:
|
| 10 |
+
|
| 11 |
+
1. Download the pre-processed dataset with satellite features
|
| 12 |
+
2. Place `Islam2019_WithGeocodesAndSatData.Rdata` in the app directory
|
| 13 |
+
3. Select "Use Example (Islam 2019)" in the app
|
| 14 |
+
|
| 15 |
+
### Option 2: Upload Your Own CSV
|
| 16 |
+
|
| 17 |
+
Your CSV should include:
|
| 18 |
+
- Treatment assignment column (e.g., `begum_treat` with values 1=control, 2=treatment)
|
| 19 |
+
- Satellite features: `ndvi_median`, `viirs_median` (or similar)
|
| 20 |
+
- Any other columns for reference
|
| 21 |
+
|
| 22 |
+
Example CSV structure:
|
| 23 |
+
```
|
| 24 |
+
id,begum_treat,ndvi_median,viirs_median
|
| 25 |
+
1,1,0.45,2.3
|
| 26 |
+
2,2,0.52,3.1
|
| 27 |
+
...
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## Setting Up GEE for New Data
|
| 31 |
+
|
| 32 |
+
The app uses **pre-computed** satellite features. To add GEE capabilities for computing features on-the-fly:
|
| 33 |
+
|
| 34 |
+
### Prerequisites
|
| 35 |
+
1. Google Earth Engine account (free): https://earthengine.google.com/signup/
|
| 36 |
+
2. Python environment with `earthengine-api`
|
| 37 |
+
|
| 38 |
+
### Installation Steps
|
| 39 |
+
|
| 40 |
+
```bash
|
| 41 |
+
# Install Earth Engine Python API
|
| 42 |
+
pip install earthengine-api
|
| 43 |
+
|
| 44 |
+
# Authenticate (first time only)
|
| 45 |
+
earthengine authenticate
|
| 46 |
+
|
| 47 |
+
# Initialize in your script
|
| 48 |
+
import ee
|
| 49 |
+
ee.Initialize()
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### Computing Satellite Features
|
| 53 |
+
|
| 54 |
+
Use the GEE code from `RemoteAuditOfBrokenRCT.R` to compute features:
|
| 55 |
+
|
| 56 |
+
```python
|
| 57 |
+
import ee
|
| 58 |
+
import pandas as pd
|
| 59 |
+
|
| 60 |
+
def compute_satellite_features(lat, lon, start_date, end_date):
|
| 61 |
+
"""
|
| 62 |
+
Compute NDVI, EVI, and VIIRS features for a location
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
lat, lon: Coordinates
|
| 66 |
+
start_date, end_date: Date range (YYYY-MM-DD)
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
dict with ndvi_median, viirs_median, etc.
|
| 70 |
+
"""
|
| 71 |
+
point = ee.Geometry.Point([lon, lat])
|
| 72 |
+
|
| 73 |
+
# MODIS vegetation indices
|
| 74 |
+
modis = (ee.ImageCollection('MODIS/061/MOD13Q1')
|
| 75 |
+
.filterDate(start_date, end_date)
|
| 76 |
+
.select(['NDVI', 'EVI'])
|
| 77 |
+
.map(lambda img: img.multiply(0.0001)))
|
| 78 |
+
|
| 79 |
+
ndvi_median = modis.select('NDVI').median()
|
| 80 |
+
evi_median = modis.select('EVI').median()
|
| 81 |
+
|
| 82 |
+
# VIIRS nighttime lights
|
| 83 |
+
viirs = (ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG')
|
| 84 |
+
.filterDate(start_date, end_date)
|
| 85 |
+
.select(['avg_rad']))
|
| 86 |
+
|
| 87 |
+
viirs_median = viirs.median()
|
| 88 |
+
|
| 89 |
+
# Sample at location
|
| 90 |
+
sample = (ndvi_median.addBands([evi_median, viirs_median])
|
| 91 |
+
.sample(point, 250)
|
| 92 |
+
.first()
|
| 93 |
+
.getInfo())
|
| 94 |
+
|
| 95 |
+
return {
|
| 96 |
+
'ndvi_median': sample['properties'].get('NDVI'),
|
| 97 |
+
'evi_median': sample['properties'].get('EVI'),
|
| 98 |
+
'viirs_median': sample['properties'].get('avg_rad')
|
| 99 |
+
}
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
### Integration with R (via reticulate)
|
| 103 |
+
|
| 104 |
+
To integrate GEE in your R workflow:
|
| 105 |
+
|
| 106 |
+
```r
|
| 107 |
+
library(reticulate)
|
| 108 |
+
|
| 109 |
+
# Set Python environment
|
| 110 |
+
Sys.setenv(RETICULATE_PYTHON = "/path/to/python")
|
| 111 |
+
|
| 112 |
+
# Import Earth Engine
|
| 113 |
+
ee <- import("ee")
|
| 114 |
+
ee$Initialize()
|
| 115 |
+
|
| 116 |
+
# Call Python function from R
|
| 117 |
+
compute_features <- py_run_string("
|
| 118 |
+
def get_features(lat, lon):
|
| 119 |
+
# Your GEE code here
|
| 120 |
+
return {'ndvi_median': ..., 'viirs_median': ...}
|
| 121 |
+
")
|
| 122 |
+
|
| 123 |
+
features <- compute_features$get_features(lat, lon)
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
## App Configuration on Hugging Face
|
| 127 |
+
|
| 128 |
+
### Required Files
|
| 129 |
+
- `app.R` - Main Shiny application
|
| 130 |
+
- `Dockerfile` - Container configuration
|
| 131 |
+
- `Islam2019_WithGeocodesAndSatData.Rdata` - Example dataset (optional)
|
| 132 |
+
|
| 133 |
+
### Environment Variables
|
| 134 |
+
None required for basic functionality.
|
| 135 |
+
|
| 136 |
+
### Secrets (if adding GEE)
|
| 137 |
+
If you want to enable on-the-fly GEE queries:
|
| 138 |
+
1. Add `GOOGLE_APPLICATION_CREDENTIALS` secret in HF Space settings
|
| 139 |
+
2. Upload service account JSON
|
| 140 |
+
3. Modify app to call GEE API
|
| 141 |
+
|
| 142 |
+
## Usage Guide
|
| 143 |
+
|
| 144 |
+
### Running a Randomization Audit
|
| 145 |
+
|
| 146 |
+
1. **Load Data**: Upload CSV or use example
|
| 147 |
+
2. **Configure Audit**:
|
| 148 |
+
- Audit Type: "Randomization"
|
| 149 |
+
- Treatment Column: Select column (e.g., `begum_treat`)
|
| 150 |
+
- Control Value: 1
|
| 151 |
+
- Treatment Value: 2
|
| 152 |
+
3. **Select Features**: Check `ndvi_median` and `viirs_median`
|
| 153 |
+
4. **Choose Learner**: Logistic (fast) or XGBoost (more flexible)
|
| 154 |
+
5. **Set Parameters**:
|
| 155 |
+
- K-Folds: 5-10 (higher = more robust)
|
| 156 |
+
- Resamples: 1000-2000 (higher = more precise p-value)
|
| 157 |
+
6. **Run Audit**: Click "Run Audit" button
|
| 158 |
+
|
| 159 |
+
### Running a Missingness Audit
|
| 160 |
+
|
| 161 |
+
Same steps but:
|
| 162 |
+
- Audit Type: "Missingness"
|
| 163 |
+
- Select variable to check for missing data patterns
|
| 164 |
+
|
| 165 |
+
### Interpreting Results
|
| 166 |
+
|
| 167 |
+
- **p < 0.05**: Assignment is MORE predictable from satellite features than expected → potential deviation from stated randomization
|
| 168 |
+
- **p ≥ 0.05**: No evidence of deviation detected (but doesn't prove perfect randomization)
|
| 169 |
+
|
| 170 |
+
## Technical Notes
|
| 171 |
+
|
| 172 |
+
### Computation Time
|
| 173 |
+
- Logistic: ~1-3 minutes for 500 units, 1000 resamples
|
| 174 |
+
- XGBoost: ~3-10 minutes (depends on tree settings)
|
| 175 |
+
|
| 176 |
+
### Memory Requirements
|
| 177 |
+
- Small datasets (<1000 units): 2GB RAM sufficient
|
| 178 |
+
- Large datasets (>5000 units): Consider 4GB+ RAM
|
| 179 |
+
|
| 180 |
+
### Handling Missing Satellite Data
|
| 181 |
+
|
| 182 |
+
If your CSV has missing satellite features:
|
| 183 |
+
- App will drop rows with missing values
|
| 184 |
+
- Consider imputation before upload, or
|
| 185 |
+
- Use GEE to compute features for missing locations
|
| 186 |
+
|
| 187 |
+
## Troubleshooting
|
| 188 |
+
|
| 189 |
+
### "Feature not found" error
|
| 190 |
+
- Check that your CSV has columns named exactly: `ndvi_median`, `viirs_median`
|
| 191 |
+
- Column names are case-sensitive
|
| 192 |
+
|
| 193 |
+
### "Too few complete cases" error
|
| 194 |
+
- Ensure at least 10 units have both valid treatment assignment and satellite features
|
| 195 |
+
- Check for NA values in your data
|
| 196 |
+
|
| 197 |
+
### GEE authentication issues
|
| 198 |
+
```bash
|
| 199 |
+
# Re-authenticate
|
| 200 |
+
earthengine authenticate
|
| 201 |
+
|
| 202 |
+
# Check credentials
|
| 203 |
+
python -c "import ee; ee.Initialize(); print('Success!')"
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
### Dockerfile build fails
|
| 207 |
+
```bash
|
| 208 |
+
# Test locally
|
| 209 |
+
docker build -t remote-audit .
|
| 210 |
+
docker run -p 7860:7860 remote-audit
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
## Citation
|
| 214 |
+
|
| 215 |
+
If you use this app, please cite:
|
| 216 |
+
|
| 217 |
+
```
|
| 218 |
+
Jerzak, C. T., & Daoud, A. (2025). Remote Auditing: Design-Based Tests
|
| 219 |
+
of Randomization, Selection, and Missingness with Broadly Accessible
|
| 220 |
+
Satellite Imagery.
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
## Support
|
| 224 |
+
|
| 225 |
+
For issues or questions:
|
| 226 |
+
- Check the paper's technical appendix
|
| 227 |
+
- Review example code in `RemoteAuditOfBrokenRCT.R`
|
| 228 |
+
- Contact: [your contact info]
|
app.R
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
# app.R — Remote Audit: Design-Based Tests of Randomization with Satellite Imagery
|
| 2 |
# ==============================================================================
|
| 3 |
# Performs conditional randomization tests to audit experimental integrity
|
|
@@ -289,6 +290,15 @@ server <- function(input, output, session) {
|
|
| 289 |
selected = grep("lon|long", cols, value = TRUE, ignore.case = TRUE)[1] %||% NULL)
|
| 290 |
})
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
output$data_preview <- renderDT({
|
| 293 |
df <- data_loaded()
|
| 294 |
req(df)
|
|
@@ -319,8 +329,6 @@ server <- function(input, output, session) {
|
|
| 319 |
gee_email <- Sys.getenv("GEE_EMAIL", unset = NULL)
|
| 320 |
gee_key <- Sys.getenv("GEE_KEY", unset = NULL)
|
| 321 |
|
| 322 |
-
if (!py_module_available("ee")) py_install("earthengine-api")
|
| 323 |
-
|
| 324 |
py_run_string("
|
| 325 |
import ee, pandas as pd, json
|
| 326 |
|
|
|
|
| 1 |
+
# setwd("~/Dropbox/ImageDeconfoundAid/BrokenExperiment/ShinyApp/"); Sys.setenv(RETICULATE_PYTHON = "/Users/cjerzak/miniconda3/bin/python")
|
| 2 |
# app.R — Remote Audit: Design-Based Tests of Randomization with Satellite Imagery
|
| 3 |
# ==============================================================================
|
| 4 |
# Performs conditional randomization tests to audit experimental integrity
|
|
|
|
| 290 |
selected = grep("lon|long", cols, value = TRUE, ignore.case = TRUE)[1] %||% NULL)
|
| 291 |
})
|
| 292 |
|
| 293 |
+
observeEvent(input$data_source, {
|
| 294 |
+
if (input$data_source == "upload" && is.null(input$file_csv)) {
|
| 295 |
+
updateSelectInput(session, "treat_col", choices = character(0))
|
| 296 |
+
updateSelectInput(session, "missing_col", choices = character(0))
|
| 297 |
+
updateSelectInput(session, "lat_col", choices = character(0))
|
| 298 |
+
updateSelectInput(session, "long_col", choices = character(0))
|
| 299 |
+
}
|
| 300 |
+
})
|
| 301 |
+
|
| 302 |
output$data_preview <- renderDT({
|
| 303 |
df <- data_loaded()
|
| 304 |
req(df)
|
|
|
|
| 329 |
gee_email <- Sys.getenv("GEE_EMAIL", unset = NULL)
|
| 330 |
gee_key <- Sys.getenv("GEE_KEY", unset = NULL)
|
| 331 |
|
|
|
|
|
|
|
| 332 |
py_run_string("
|
| 333 |
import ee, pandas as pd, json
|
| 334 |
|