srmsoumya commited on
Commit
e84f5c0
·
1 Parent(s): a63a92a

chore: Add logo, explain data & methodology

Browse files
README.md CHANGED
@@ -9,6 +9,8 @@ app_port: 7860
9
 
10
  # Gazet
11
 
 
 
12
  Lean natural-language geocoder with GIS operations over Overture and Natural Earth parquet datasets.
13
 
14
  Gazet is built to be easily packagable and minimal in setup, trying to push the boundaries on how small we can go in setup for LLM driven data applications. It is built for working with small language models and parquet files.
 
9
 
10
  # Gazet
11
 
12
+ <img src="assets/gazet-logo.svg" alt="Gazet logo" width="64" />
13
+
14
  Lean natural-language geocoder with GIS operations over Overture and Natural Earth parquet datasets.
15
 
16
  Gazet is built to be easily packagable and minimal in setup, trying to push the boundaries on how small we can go in setup for LLM driven data applications. It is built for working with small language models and parquet files.
assets/ds-logo-pos.svg ADDED
assets/gazet-logo.svg ADDED
gazet_demo.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import math
5
  import os
6
  import re
 
7
 
8
  import pandas as pd
9
  import requests
@@ -182,17 +183,28 @@ def _track(event_name: str, **props):
182
  )
183
 
184
  EXAMPLES = [
185
- "Odisha, India",
186
  "Neighbouring states of Odisha",
187
- "Odisha excluding Cuttack",
188
- "Coastal districts of Odisha",
189
- "1 km buffer along the border of Odisha and West Bengal",
190
- "Western half of Odisha",
191
- "Rivers flowing through Odisha",
192
- "Districts along the Indravati river",
 
 
193
  ]
194
 
195
- st.set_page_config(page_title="Gazet", page_icon="🌍", layout="wide")
 
 
 
 
 
 
 
 
 
196
  _inject_plausible()
197
  st.markdown("""<style>
198
  [data-testid="stBaseButton-tertiary"] {
@@ -212,6 +224,71 @@ st.caption(
212
  "/ask plain english to geometry"
213
  )
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  backend = "gguf"
216
 
217
  if "run_q" not in st.session_state:
 
4
  import math
5
  import os
6
  import re
7
+ from pathlib import Path
8
 
9
  import pandas as pd
10
  import requests
 
183
  )
184
 
185
  EXAMPLES = [
186
+ "Goa, India",
187
  "Neighbouring states of Odisha",
188
+ "Karnataka excluding Bengaluru",
189
+ "Coastal districts of Kerala",
190
+ "1 km buffer along the border of West Bengal and Odisha",
191
+ "Northern half of India",
192
+ "Rivers flowing through Tamil Nadu",
193
+ "Districts along the Cauvery river",
194
+ "Largest district of Bihar",
195
+ "merge Bihar and Jharkhand"
196
  ]
197
 
198
+ LOGO_PATH = str(Path(__file__).parent / "assets" / "gazet-logo.svg")
199
+ DEVSEED_LOGO_PATH = str(Path(__file__).parent / "assets" / "ds-logo-pos.svg")
200
+
201
+ st.set_page_config(
202
+ page_title="Gazet",
203
+ page_icon=LOGO_PATH,
204
+ layout="wide",
205
+ initial_sidebar_state="collapsed",
206
+ )
207
+ st.logo(LOGO_PATH, size="large")
208
  _inject_plausible()
209
  st.markdown("""<style>
210
  [data-testid="stBaseButton-tertiary"] {
 
224
  "/ask plain english to geometry"
225
  )
226
 
227
+ with st.sidebar:
228
+ st.header("Learn how this was built")
229
+ st.markdown(
230
+ """
231
+ **Gazet** turns plain English questions into geometries on a map.
232
+
233
+ ### Behind the scenes
234
+
235
+ This demo is powered by a small language model (SLM) finetuned from
236
+ **Qwen3.5 0.8B**. We picked a small model on purpose: it is fast, cheap to
237
+ run, and easy to host. The trade off is that it works best on the kinds of
238
+ questions it was trained for. Wide open queries may not always work, but
239
+ that is also the point of this work.
240
+
241
+ ### Why a small model?
242
+
243
+ Small models are easy to improve. When the model fails on a new kind of
244
+ question, we can add a few examples, finetune again in a short cycle, and
245
+ ship the fix. The dataset for this demo was generated synthetically from
246
+ templates and grew over time as we added new question patterns. You can
247
+ follow the same approach for your own domain.
248
+
249
+ ### Data sources
250
+
251
+ The model queries two open geographic datasets:
252
+
253
+ - [Overture Maps – Divisions Area](https://docs.overturemaps.org/schema/reference/divisions/division_area/)
254
+ for administrative boundaries (countries, states, districts, localities).
255
+ - [Natural Earth](https://www.naturalearthdata.com/) for physical
256
+ features such as rivers, lakes, mountain ranges, and coastlines.
257
+
258
+ ### Links
259
+
260
+ - Dataset:
261
+ [developmentseed/gazet-dataset](https://huggingface.co/datasets/developmentseed/gazet-dataset)
262
+ - Model:
263
+ [developmentseed/gazet-model](https://huggingface.co/developmentseed/gazet-model)
264
+ - Hosted Space:
265
+ [developmentseed/gazet](https://huggingface.co/spaces/developmentseed/gazet)
266
+ - Source code:
267
+ [developmentseed/gazet](https://github.com/developmentseed/gazet)
268
+
269
+ ### Talk to us
270
+
271
+ Interested in small models for your own vertical, or want to try this
272
+ approach on a different domain? Reach out:
273
+
274
+ - Soumya: [soumya@developmentseed.org](mailto:soumya@developmentseed.org)
275
+ - Daniel: [danielwiesmann@developmentseed.org](mailto:danielwiesmann@developmentseed.org)
276
+
277
+ ### Tips for asking good questions
278
+
279
+ - Use a place name the model is likely to know (countries, states,
280
+ major districts, well known rivers and lakes).
281
+ - Combine simple operations: union, intersection, difference, buffer,
282
+ half splits, neighbours.
283
+ - If a query fails, try rephrasing it more concretely or narrow down
284
+ the search space, for example: "Coastal districts of Odisha" instead of
285
+ "Areas near the sea".
286
+ """
287
+ )
288
+ st.divider()
289
+ st.caption("Built by")
290
+ st.image(DEVSEED_LOGO_PATH, width=180)
291
+
292
  backend = "gguf"
293
 
294
  if "run_q" not in st.session_state:
modal_serve/README.md CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # Modal Deployment
2
 
3
  Deploys Gazet to Modal as three independently scaled containers:
@@ -59,54 +61,10 @@ API directly:
59
  curl "https://<workspace>--gazet-api-fastapi-app.modal.run/search?q=Odisha"
60
  ```
61
 
62
- Expected timings:
63
-
64
- - First request after idle: ~25-30s (cold start cascade across all three)
65
- - Warm requests within scaledown windows: ~1-3s
66
-
67
- ## 4. Set the budget cap
68
-
69
- Modal dashboard -> Settings -> Billing:
70
-
71
- - Workspace spending limit: **$50/mo**
72
- - Email alerts at $25 / $40 / $50
73
-
74
- Modal pauses new container starts when the limit is hit.
75
-
76
  ## Updating
77
 
78
- Code or dependency changes:
79
-
80
  ```bash
81
  modal deploy modal_serve/serve.py
82
  ```
83
 
84
  Model updates: re-upload to the `gazet` volume; running containers pick up the new file at next cold start.
85
-
86
- ## Architecture notes
87
-
88
- - **No supervisord.** Each Cls runs one logical service.
89
- - **`@modal.asgi_app`** serves FastAPI natively, no uvicorn subprocess.
90
- - **`@modal.web_server`** wraps non-ASGI processes (`llama-server` binary, Streamlit).
91
- - **Cross-Cls URLs** resolved at runtime via `modal.Cls.from_name(...)`.
92
- - **`scaledown_window`** tuned per tier: 120s GPU, 300s API, 600s Demo (UI sessions are sticky).
93
-
94
- ## Cost reference
95
-
96
- | Traffic | Monthly estimate |
97
- |---|---|
98
- | 200 queries/day | ~$5-7 |
99
- | 1000 queries/day | ~$18-25 |
100
- | Idle | $0 |
101
-
102
- T4 GPU at ~$0.59/hr is only billed during active inference + brief warmup. CPU containers are negligible.
103
-
104
- ## Troubleshooting
105
-
106
- **Cold start fails on `LlamaServer`**: check the binary path in `modal_serve/serve.py`. The official image's binary is at `/app/llama-server`; if upstream changes, run `modal shell gazet::LlamaServer` and `which llama-server`.
107
-
108
- **`Api` cannot reach `LlamaServer`**: confirm `modal.Cls.from_name(...).serve.web_url` returns a non-empty string. The first deploy registers URLs; redeploys keep them stable.
109
-
110
- **Streamlit websocket errors**: `@modal.web_server` supports websockets natively; if a proxy issue appears, raise `startup_timeout` and check `modal logs gazet`.
111
-
112
- **Model not found**: the path in `serve.py` is `/models/checkpoints/qwen35-fientune-v3/ckpt-q4_k_m.gguf`. Verify the volume layout matches.
 
1
+ <img src="../assets/gazet-logo.svg" alt="Gazet logo" width="64" />
2
+
3
  # Modal Deployment
4
 
5
  Deploys Gazet to Modal as three independently scaled containers:
 
61
  curl "https://<workspace>--gazet-api-fastapi-app.modal.run/search?q=Odisha"
62
  ```
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  ## Updating
65
 
 
 
66
  ```bash
67
  modal deploy modal_serve/serve.py
68
  ```
69
 
70
  Model updates: re-upload to the `gazet` volume; running containers pick up the new file at next cold start.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modal_serve/serve.py CHANGED
@@ -48,7 +48,8 @@ llama_image = (
48
  image=llama_image,
49
  gpu="T4",
50
  volumes={"/models": gazet_vol},
51
- scaledown_window=120,
 
52
  max_containers=2,
53
  timeout=600,
54
  )
@@ -82,6 +83,7 @@ api_image = (
82
  image=api_image,
83
  volumes={"/data": data_vol},
84
  scaledown_window=300,
 
85
  max_containers=3,
86
  timeout=300,
87
  )
@@ -108,12 +110,15 @@ demo_image = (
108
  .pip_install_from_pyproject("pyproject.toml", optional_dependencies=["demo"])
109
  .add_local_python_source("gazet")
110
  .add_local_file("gazet_demo.py", "/root/gazet_demo.py")
 
 
111
  )
112
 
113
 
114
  @app.cls(
115
  image=demo_image,
116
  scaledown_window=600,
 
117
  max_containers=3,
118
  timeout=600,
119
  )
 
48
  image=llama_image,
49
  gpu="T4",
50
  volumes={"/models": gazet_vol},
51
+ scaledown_window=300,
52
+ min_containers=1,
53
  max_containers=2,
54
  timeout=600,
55
  )
 
83
  image=api_image,
84
  volumes={"/data": data_vol},
85
  scaledown_window=300,
86
+ min_containers=1,
87
  max_containers=3,
88
  timeout=300,
89
  )
 
110
  .pip_install_from_pyproject("pyproject.toml", optional_dependencies=["demo"])
111
  .add_local_python_source("gazet")
112
  .add_local_file("gazet_demo.py", "/root/gazet_demo.py")
113
+ .add_local_file("assets/gazet-logo.svg", "/root/assets/gazet-logo.svg")
114
+ .add_local_file("assets/ds-logo-pos.svg", "/root/assets/ds-logo-pos.svg")
115
  )
116
 
117
 
118
  @app.cls(
119
  image=demo_image,
120
  scaledown_window=600,
121
+ min_containers=1,
122
  max_containers=3,
123
  timeout=600,
124
  )