Spaces:
Sleeping
Sleeping
chore: Add logo, explain data & methodology
Browse files- README.md +2 -0
- assets/ds-logo-pos.svg +1 -0
- assets/gazet-logo.svg +1 -0
- gazet_demo.py +85 -8
- modal_serve/README.md +2 -44
- modal_serve/serve.py +6 -1
README.md
CHANGED
|
@@ -9,6 +9,8 @@ app_port: 7860
|
|
| 9 |
|
| 10 |
# Gazet
|
| 11 |
|
|
|
|
|
|
|
| 12 |
Lean natural-language geocoder with GIS operations over Overture and Natural Earth parquet datasets.
|
| 13 |
|
| 14 |
Gazet is built to be easily packagable and minimal in setup, trying to push the boundaries on how small we can go in setup for LLM driven data applications. It is built for working with small language models and parquet files.
|
|
|
|
| 9 |
|
| 10 |
# Gazet
|
| 11 |
|
| 12 |
+
<img src="assets/gazet-logo.svg" alt="Gazet logo" width="64" />
|
| 13 |
+
|
| 14 |
Lean natural-language geocoder with GIS operations over Overture and Natural Earth parquet datasets.
|
| 15 |
|
| 16 |
Gazet is built to be easily packagable and minimal in setup, trying to push the boundaries on how small we can go in setup for LLM driven data applications. It is built for working with small language models and parquet files.
|
assets/ds-logo-pos.svg
ADDED
|
|
assets/gazet-logo.svg
ADDED
|
|
gazet_demo.py
CHANGED
|
@@ -4,6 +4,7 @@ import json
|
|
| 4 |
import math
|
| 5 |
import os
|
| 6 |
import re
|
|
|
|
| 7 |
|
| 8 |
import pandas as pd
|
| 9 |
import requests
|
|
@@ -182,17 +183,28 @@ def _track(event_name: str, **props):
|
|
| 182 |
)
|
| 183 |
|
| 184 |
EXAMPLES = [
|
| 185 |
-
"
|
| 186 |
"Neighbouring states of Odisha",
|
| 187 |
-
"
|
| 188 |
-
"Coastal districts of
|
| 189 |
-
"1 km buffer along the border of
|
| 190 |
-
"
|
| 191 |
-
"Rivers flowing through
|
| 192 |
-
"Districts along the
|
|
|
|
|
|
|
| 193 |
]
|
| 194 |
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
_inject_plausible()
|
| 197 |
st.markdown("""<style>
|
| 198 |
[data-testid="stBaseButton-tertiary"] {
|
|
@@ -212,6 +224,71 @@ st.caption(
|
|
| 212 |
"/ask plain english to geometry"
|
| 213 |
)
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
backend = "gguf"
|
| 216 |
|
| 217 |
if "run_q" not in st.session_state:
|
|
|
|
| 4 |
import math
|
| 5 |
import os
|
| 6 |
import re
|
| 7 |
+
from pathlib import Path
|
| 8 |
|
| 9 |
import pandas as pd
|
| 10 |
import requests
|
|
|
|
| 183 |
)
|
| 184 |
|
| 185 |
EXAMPLES = [
|
| 186 |
+
"Goa, India",
|
| 187 |
"Neighbouring states of Odisha",
|
| 188 |
+
"Karnataka excluding Bengaluru",
|
| 189 |
+
"Coastal districts of Kerala",
|
| 190 |
+
"1 km buffer along the border of West Bengal and Odisha",
|
| 191 |
+
"Northern half of India",
|
| 192 |
+
"Rivers flowing through Tamil Nadu",
|
| 193 |
+
"Districts along the Cauvery river",
|
| 194 |
+
"Largest district of Bihar",
|
| 195 |
+
"merge Bihar and Jharkhand"
|
| 196 |
]
|
| 197 |
|
| 198 |
+
LOGO_PATH = str(Path(__file__).parent / "assets" / "gazet-logo.svg")
|
| 199 |
+
DEVSEED_LOGO_PATH = str(Path(__file__).parent / "assets" / "ds-logo-pos.svg")
|
| 200 |
+
|
| 201 |
+
st.set_page_config(
|
| 202 |
+
page_title="Gazet",
|
| 203 |
+
page_icon=LOGO_PATH,
|
| 204 |
+
layout="wide",
|
| 205 |
+
initial_sidebar_state="collapsed",
|
| 206 |
+
)
|
| 207 |
+
st.logo(LOGO_PATH, size="large")
|
| 208 |
_inject_plausible()
|
| 209 |
st.markdown("""<style>
|
| 210 |
[data-testid="stBaseButton-tertiary"] {
|
|
|
|
| 224 |
"/ask plain english to geometry"
|
| 225 |
)
|
| 226 |
|
| 227 |
+
with st.sidebar:
|
| 228 |
+
st.header("Learn how this was built")
|
| 229 |
+
st.markdown(
|
| 230 |
+
"""
|
| 231 |
+
**Gazet** turns plain English questions into geometries on a map.
|
| 232 |
+
|
| 233 |
+
### Behind the scenes
|
| 234 |
+
|
| 235 |
+
This demo is powered by a small language model (SLM) finetuned from
|
| 236 |
+
**Qwen3.5 0.8B**. We picked a small model on purpose: it is fast, cheap to
|
| 237 |
+
run, and easy to host. The trade off is that it works best on the kinds of
|
| 238 |
+
questions it was trained for. Wide open queries may not always work, but
|
| 239 |
+
that is also the point of this work.
|
| 240 |
+
|
| 241 |
+
### Why a small model?
|
| 242 |
+
|
| 243 |
+
Small models are easy to improve. When the model fails on a new kind of
|
| 244 |
+
question, we can add a few examples, finetune again in a short cycle, and
|
| 245 |
+
ship the fix. The dataset for this demo was generated synthetically from
|
| 246 |
+
templates and grew over time as we added new question patterns. You can
|
| 247 |
+
follow the same approach for your own domain.
|
| 248 |
+
|
| 249 |
+
### Data sources
|
| 250 |
+
|
| 251 |
+
The model queries two open geographic datasets:
|
| 252 |
+
|
| 253 |
+
- [Overture Maps – Divisions Area](https://docs.overturemaps.org/schema/reference/divisions/division_area/)
|
| 254 |
+
for administrative boundaries (countries, states, districts, localities).
|
| 255 |
+
- [Natural Earth](https://www.naturalearthdata.com/) for physical
|
| 256 |
+
features such as rivers, lakes, mountain ranges, and coastlines.
|
| 257 |
+
|
| 258 |
+
### Links
|
| 259 |
+
|
| 260 |
+
- Dataset:
|
| 261 |
+
[developmentseed/gazet-dataset](https://huggingface.co/datasets/developmentseed/gazet-dataset)
|
| 262 |
+
- Model:
|
| 263 |
+
[developmentseed/gazet-model](https://huggingface.co/developmentseed/gazet-model)
|
| 264 |
+
- Hosted Space:
|
| 265 |
+
[developmentseed/gazet](https://huggingface.co/spaces/developmentseed/gazet)
|
| 266 |
+
- Source code:
|
| 267 |
+
[developmentseed/gazet](https://github.com/developmentseed/gazet)
|
| 268 |
+
|
| 269 |
+
### Talk to us
|
| 270 |
+
|
| 271 |
+
Interested in small models for your own vertical, or want to try this
|
| 272 |
+
approach on a different domain? Reach out:
|
| 273 |
+
|
| 274 |
+
- Soumya: [soumya@developmentseed.org](mailto:soumya@developmentseed.org)
|
| 275 |
+
- Daniel: [danielwiesmann@developmentseed.org](mailto:danielwiesmann@developmentseed.org)
|
| 276 |
+
|
| 277 |
+
### Tips for asking good questions
|
| 278 |
+
|
| 279 |
+
- Use a place name the model is likely to know (countries, states,
|
| 280 |
+
major districts, well known rivers and lakes).
|
| 281 |
+
- Combine simple operations: union, intersection, difference, buffer,
|
| 282 |
+
half splits, neighbours.
|
| 283 |
+
- If a query fails, try rephrasing it more concretely or narrow down
|
| 284 |
+
the search space, for example: "Coastal districts of Odisha" instead of
|
| 285 |
+
"Areas near the sea".
|
| 286 |
+
"""
|
| 287 |
+
)
|
| 288 |
+
st.divider()
|
| 289 |
+
st.caption("Built by")
|
| 290 |
+
st.image(DEVSEED_LOGO_PATH, width=180)
|
| 291 |
+
|
| 292 |
backend = "gguf"
|
| 293 |
|
| 294 |
if "run_q" not in st.session_state:
|
modal_serve/README.md
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
# Modal Deployment
|
| 2 |
|
| 3 |
Deploys Gazet to Modal as three independently scaled containers:
|
|
@@ -59,54 +61,10 @@ API directly:
|
|
| 59 |
curl "https://<workspace>--gazet-api-fastapi-app.modal.run/search?q=Odisha"
|
| 60 |
```
|
| 61 |
|
| 62 |
-
Expected timings:
|
| 63 |
-
|
| 64 |
-
- First request after idle: ~25-30s (cold start cascade across all three)
|
| 65 |
-
- Warm requests within scaledown windows: ~1-3s
|
| 66 |
-
|
| 67 |
-
## 4. Set the budget cap
|
| 68 |
-
|
| 69 |
-
Modal dashboard -> Settings -> Billing:
|
| 70 |
-
|
| 71 |
-
- Workspace spending limit: **$50/mo**
|
| 72 |
-
- Email alerts at $25 / $40 / $50
|
| 73 |
-
|
| 74 |
-
Modal pauses new container starts when the limit is hit.
|
| 75 |
-
|
| 76 |
## Updating
|
| 77 |
|
| 78 |
-
Code or dependency changes:
|
| 79 |
-
|
| 80 |
```bash
|
| 81 |
modal deploy modal_serve/serve.py
|
| 82 |
```
|
| 83 |
|
| 84 |
Model updates: re-upload to the `gazet` volume; running containers pick up the new file at next cold start.
|
| 85 |
-
|
| 86 |
-
## Architecture notes
|
| 87 |
-
|
| 88 |
-
- **No supervisord.** Each Cls runs one logical service.
|
| 89 |
-
- **`@modal.asgi_app`** serves FastAPI natively, no uvicorn subprocess.
|
| 90 |
-
- **`@modal.web_server`** wraps non-ASGI processes (`llama-server` binary, Streamlit).
|
| 91 |
-
- **Cross-Cls URLs** resolved at runtime via `modal.Cls.from_name(...)`.
|
| 92 |
-
- **`scaledown_window`** tuned per tier: 120s GPU, 300s API, 600s Demo (UI sessions are sticky).
|
| 93 |
-
|
| 94 |
-
## Cost reference
|
| 95 |
-
|
| 96 |
-
| Traffic | Monthly estimate |
|
| 97 |
-
|---|---|
|
| 98 |
-
| 200 queries/day | ~$5-7 |
|
| 99 |
-
| 1000 queries/day | ~$18-25 |
|
| 100 |
-
| Idle | $0 |
|
| 101 |
-
|
| 102 |
-
T4 GPU at ~$0.59/hr is only billed during active inference + brief warmup. CPU containers are negligible.
|
| 103 |
-
|
| 104 |
-
## Troubleshooting
|
| 105 |
-
|
| 106 |
-
**Cold start fails on `LlamaServer`**: check the binary path in `modal_serve/serve.py`. The official image's binary is at `/app/llama-server`; if upstream changes, run `modal shell gazet::LlamaServer` and `which llama-server`.
|
| 107 |
-
|
| 108 |
-
**`Api` cannot reach `LlamaServer`**: confirm `modal.Cls.from_name(...).serve.web_url` returns a non-empty string. The first deploy registers URLs; redeploys keep them stable.
|
| 109 |
-
|
| 110 |
-
**Streamlit websocket errors**: `@modal.web_server` supports websockets natively; if a proxy issue appears, raise `startup_timeout` and check `modal logs gazet`.
|
| 111 |
-
|
| 112 |
-
**Model not found**: the path in `serve.py` is `/models/checkpoints/qwen35-fientune-v3/ckpt-q4_k_m.gguf`. Verify the volume layout matches.
|
|
|
|
| 1 |
+
<img src="../assets/gazet-logo.svg" alt="Gazet logo" width="64" />
|
| 2 |
+
|
| 3 |
# Modal Deployment
|
| 4 |
|
| 5 |
Deploys Gazet to Modal as three independently scaled containers:
|
|
|
|
| 61 |
curl "https://<workspace>--gazet-api-fastapi-app.modal.run/search?q=Odisha"
|
| 62 |
```
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
## Updating
|
| 65 |
|
|
|
|
|
|
|
| 66 |
```bash
|
| 67 |
modal deploy modal_serve/serve.py
|
| 68 |
```
|
| 69 |
|
| 70 |
Model updates: re-upload to the `gazet` volume; running containers pick up the new file at next cold start.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modal_serve/serve.py
CHANGED
|
@@ -48,7 +48,8 @@ llama_image = (
|
|
| 48 |
image=llama_image,
|
| 49 |
gpu="T4",
|
| 50 |
volumes={"/models": gazet_vol},
|
| 51 |
-
scaledown_window=
|
|
|
|
| 52 |
max_containers=2,
|
| 53 |
timeout=600,
|
| 54 |
)
|
|
@@ -82,6 +83,7 @@ api_image = (
|
|
| 82 |
image=api_image,
|
| 83 |
volumes={"/data": data_vol},
|
| 84 |
scaledown_window=300,
|
|
|
|
| 85 |
max_containers=3,
|
| 86 |
timeout=300,
|
| 87 |
)
|
|
@@ -108,12 +110,15 @@ demo_image = (
|
|
| 108 |
.pip_install_from_pyproject("pyproject.toml", optional_dependencies=["demo"])
|
| 109 |
.add_local_python_source("gazet")
|
| 110 |
.add_local_file("gazet_demo.py", "/root/gazet_demo.py")
|
|
|
|
|
|
|
| 111 |
)
|
| 112 |
|
| 113 |
|
| 114 |
@app.cls(
|
| 115 |
image=demo_image,
|
| 116 |
scaledown_window=600,
|
|
|
|
| 117 |
max_containers=3,
|
| 118 |
timeout=600,
|
| 119 |
)
|
|
|
|
| 48 |
image=llama_image,
|
| 49 |
gpu="T4",
|
| 50 |
volumes={"/models": gazet_vol},
|
| 51 |
+
scaledown_window=300,
|
| 52 |
+
min_containers=1,
|
| 53 |
max_containers=2,
|
| 54 |
timeout=600,
|
| 55 |
)
|
|
|
|
| 83 |
image=api_image,
|
| 84 |
volumes={"/data": data_vol},
|
| 85 |
scaledown_window=300,
|
| 86 |
+
min_containers=1,
|
| 87 |
max_containers=3,
|
| 88 |
timeout=300,
|
| 89 |
)
|
|
|
|
| 110 |
.pip_install_from_pyproject("pyproject.toml", optional_dependencies=["demo"])
|
| 111 |
.add_local_python_source("gazet")
|
| 112 |
.add_local_file("gazet_demo.py", "/root/gazet_demo.py")
|
| 113 |
+
.add_local_file("assets/gazet-logo.svg", "/root/assets/gazet-logo.svg")
|
| 114 |
+
.add_local_file("assets/ds-logo-pos.svg", "/root/assets/ds-logo-pos.svg")
|
| 115 |
)
|
| 116 |
|
| 117 |
|
| 118 |
@app.cls(
|
| 119 |
image=demo_image,
|
| 120 |
scaledown_window=600,
|
| 121 |
+
min_containers=1,
|
| 122 |
max_containers=3,
|
| 123 |
timeout=600,
|
| 124 |
)
|