ericakcc commited on
Commit
6e649fa
·
verified ·
1 Parent(s): 1ad3c0e

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,13 +1,57 @@
1
  ---
2
- title: Skyread
3
- emoji: 👀
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 6.17.3
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SkyRead 探空白話判讀器
3
+ emoji: 🌤️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 6.17.3
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # 🌤️ SkyRead 探空白話判讀器
13
+
14
+ > 把艱深的 Skew-T 探空圖,翻成**同行看的指數**與**阿嬤看的帶傘建議**。
15
+
16
+ ![SkyRead Skew-T](docs/screenshot.png)
17
+
18
+ ## Why
19
+
20
+ 每天全球施放上千顆探空氣球,但讀懂一張 Skew-T 需要多年訓練。
21
+ SkyRead 把它變成兩張卡片:給氣象同行的指數摘要,和給長輩的
22
+ 「要不要帶傘、能不能曬棉被」。
23
+
24
+ ## The honest small-model architecture
25
+
26
+ | 層 | 負責 | 由誰做 |
27
+ |----|------|--------|
28
+ | 數值 | CAPE/CIN、LCL/LFC/EL、K、LI、TT、PWAT | **MetPy**(確定性計算,AI 不碰數字) |
29
+ | 同行版卡片 | 指數摘要——專業讀者要的就是精確數字 | 規則式模板(確定性) |
30
+ | 生活版卡片 | 把建議講成阿嬤聽得懂的人話 | **Qwen3-0.6B**(本機推論,只改寫草稿) |
31
+ | 保險 | 模型失敗或輸出不合格時 | 規則式 fallback(同時是 LLM 的草稿) |
32
+
33
+ 小模型算不準 CAPE——所以我們不讓它算。它只做小模型真正擅長、
34
+ 也是唯一需要它的事:把一句數值正確的天氣提醒,改寫成自然的人話。
35
+ 改寫結果還要通過驗證(繁中、禁簡體字、長度、不得回音指令),
36
+ 不合格會自動重試,重試仍不過就用草稿原文。
37
+
38
+ ## Data sources
39
+
40
+ - 🛰️ 即時探空:石垣島 47918 / 香港 45004 等鄰近測站(University of Wyoming
41
+ archive;台灣本島測站未開放於該資料庫,故取距離最近者)
42
+ - 📚 經典個案:MetPy 內建(含 1999-05-04 Oklahoma tornado outbreak)
43
+ - 📄 上傳 CSV:`pressure,temperature,dewpoint,direction,speed`(hPa/°C/deg/kt),
44
+ 範例檔在 `examples/sample_sounding.csv`
45
+
46
+ ## Run locally
47
+
48
+ ```bash
49
+ uv sync
50
+ uv run python app.py # Gradio UI at http://127.0.0.1:7860
51
+ uv run python -m skyread.spike # CLI end-to-end demo
52
+ uv run pytest tests/ -v
53
+ ```
54
+
55
+ ## Built for
56
+
57
+ Hugging Face **Build Small Hackathon 2026** — Backyard AI track.
__pycache__/app.cpython-312.pyc ADDED
Binary file (6.75 kB). View file
 
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SkyRead — Gradio app: sounding -> Skew-T plot + dual-layer interpretation.
2
+
3
+ Run locally:
4
+ uv run python app.py
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import threading
10
+
11
+ import gradio as gr
12
+ from matplotlib.figure import Figure
13
+
14
+ from skyread.indices import compute_indices
15
+ from skyread.interpret import interpret_rule_based
16
+ from skyread.live import STATIONS, latest_sounding
17
+ from skyread.llm import MODEL_ID, interpret_llm, warm_up
18
+ from skyread.plot import make_skewt
19
+ from skyread.sounding import Sounding, load_csv, load_sample
20
+
21
+ # Curated, demo-safe example soundings bundled with MetPy (zero network).
22
+ EXAMPLES: dict[str, str] = {
23
+ "1999-05-04 Oklahoma (強對流 / tornado outbreak)": "may4_sounding.txt",
24
+ "2010-01-20 winter case": "jan20_sounding.txt",
25
+ "2011-11-11 case": "nov11_sounding.txt",
26
+ }
27
+
28
+ SOURCE_LIVE = "🛰️ 即時探空(鄰近測站)"
29
+ SOURCE_EXAMPLE = "📚 經典個案"
30
+ SOURCE_UPLOAD = "📄 上傳 CSV"
31
+
32
+ _MODEL_NAME = MODEL_ID.split("/")[-1]
33
+ _BADGE_LLM = (
34
+ f"🧠 生活版由 **{_MODEL_NAME}**(本機推論)改寫;"
35
+ "同行版與所有數值由 MetPy 確定性計算。"
36
+ )
37
+ _BADGE_RULE = "📐 規則式判讀(fallback);所有數值由 MetPy 確定性計算。"
38
+
39
+
40
+ def _load_sounding(
41
+ source: str, station_label: str, example_label: str, uploaded: str | None
42
+ ) -> Sounding:
43
+ """Resolve the selected data source into a parsed Sounding."""
44
+ if source == SOURCE_LIVE:
45
+ return latest_sounding(STATIONS[station_label])
46
+ if source == SOURCE_UPLOAD:
47
+ if not uploaded:
48
+ raise ValueError("請先上傳 CSV 檔")
49
+ return load_csv(uploaded, name="uploaded")
50
+ return load_sample(EXAMPLES[example_label])
51
+
52
+
53
+ def analyze(
54
+ source: str,
55
+ station_label: str,
56
+ example_label: str,
57
+ uploaded: str | None,
58
+ use_llm: bool,
59
+ ) -> tuple[Figure | None, str, str, str]:
60
+ """Run the full chain and return (figure, pro_md, grandma_md, badge_md)."""
61
+ # The whole chain is guarded: a CSV can parse fine yet still blow up in
62
+ # index computation or plotting (empty profile, increasing pressure, …).
63
+ try:
64
+ snd = _load_sounding(source, station_label, example_label, uploaded)
65
+ indices = compute_indices(snd)
66
+ if use_llm:
67
+ cards, engine = interpret_llm(indices, snd.name)
68
+ else:
69
+ cards, engine = interpret_rule_based(indices, snd.name), "rule-based"
70
+ badge = _BADGE_LLM if engine == "llm" else _BADGE_RULE
71
+ return make_skewt(snd), cards["pro"], cards["grandma"], badge
72
+ except Exception as exc: # surface as a friendly message, never a crash
73
+ return None, f"⚠️ 讀取失敗:{exc}(可改選經典個案)", "", ""
74
+
75
+
76
+ def _analyze_fast(
77
+ source: str, station_label: str, example_label: str, uploaded: str | None
78
+ ) -> tuple[Figure | None, str, str, str]:
79
+ """Instant first paint on page load: skip the LLM, show rule-based cards."""
80
+ return analyze(source, station_label, example_label, uploaded, use_llm=False)
81
+
82
+
83
+ def build_ui() -> gr.Blocks:
84
+ """Construct the Gradio interface."""
85
+ with gr.Blocks(title="SkyRead 探空白話判讀器") as demo:
86
+ gr.Markdown(
87
+ "# 🌤️ SkyRead — 探空白話判讀器\n"
88
+ "把艱深的 Skew-T 探空圖,翻成**同行看的指數**與**阿嬤看的帶傘建議**。\n"
89
+ "_數值由 MetPy 精確計算,AI 只負責把數字講成人話。_"
90
+ )
91
+ with gr.Row():
92
+ with gr.Column(scale=1):
93
+ source = gr.Radio(
94
+ choices=[SOURCE_LIVE, SOURCE_EXAMPLE, SOURCE_UPLOAD],
95
+ value=SOURCE_EXAMPLE,
96
+ label="資料來源",
97
+ )
98
+ station = gr.Dropdown(
99
+ choices=list(STATIONS),
100
+ value=list(STATIONS)[0],
101
+ label="即時測站(台灣探空未開放於 Wyoming 資料庫,取最近測站)",
102
+ )
103
+ example = gr.Dropdown(
104
+ choices=list(EXAMPLES), value=list(EXAMPLES)[0], label="範例探空"
105
+ )
106
+ upload = gr.File(
107
+ label="探空 CSV (pressure,temperature,dewpoint,direction,speed)",
108
+ file_types=[".csv"],
109
+ type="filepath",
110
+ )
111
+ use_llm = gr.Checkbox(
112
+ value=True,
113
+ label=f"🧠 用 {_MODEL_NAME} 潤飾生活版(慢幾秒,但更像人話)",
114
+ )
115
+ btn = gr.Button("判讀 ☁️", variant="primary")
116
+ with gr.Column(scale=1):
117
+ plot = gr.Plot(label="Skew-T / Log-P")
118
+ pro = gr.Markdown()
119
+ grandma = gr.Markdown()
120
+ badge = gr.Markdown()
121
+
122
+ btn.click(
123
+ analyze,
124
+ inputs=[source, station, example, upload, use_llm],
125
+ outputs=[plot, pro, grandma, badge],
126
+ )
127
+ demo.load(
128
+ _analyze_fast,
129
+ inputs=[source, station, example, upload],
130
+ outputs=[plot, pro, grandma, badge],
131
+ )
132
+ return demo
133
+
134
+
135
+ if __name__ == "__main__":
136
+ threading.Thread(target=warm_up, daemon=True).start()
137
+ build_ui().launch(theme=gr.themes.Soft())
examples/sample_sounding.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ pressure,temperature,dewpoint,direction,speed
2
+ 1000,30.0,24.0,180,10
3
+ 925,24.0,20.0,190,15
4
+ 850,18.0,14.0,200,20
5
+ 700,8.0,2.0,220,25
6
+ 500,-10.0,-20.0,240,35
7
+ 400,-22.0,-35.0,250,45
8
+ 300,-38.0,-55.0,260,55
9
+ 250,-48.0,-65.0,260,60
10
+ 200,-55.0,-70.0,270,65
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "skyread"
3
+ version = "0.1.0"
4
+ description = "Turn Skew-T soundings into plain-language weather advice — MetPy computes, a 0.5B LLM narrates"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=6.17.3",
9
+ "matplotlib>=3.10.9",
10
+ "metpy>=1.7.1",
11
+ "pandas>=3.0.3",
12
+ "siphon>=0.10.0",
13
+ "torch>=2.11.0",
14
+ # >=4.51 for the qwen3 architecture. NOTE: this drops MiniCPM3 support
15
+ # (its remote code needs 4.46.x) — see PROGRESS notes, 2026-06-11.
16
+ "transformers>=4.51",
17
+ ]
18
+
19
+ # Linux boxes here run driver CUDA 12.8, but PyPI torch ships cu130-only
20
+ # builds; pull the cu128 wheels instead (macOS keeps PyPI).
21
+ # NOTE: when re-exporting requirements.txt for the HF Space, keep torch
22
+ # from PyPI (strip the download.pytorch.org lines).
23
+ [tool.uv.sources]
24
+ torch = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux'" }]
25
+
26
+ [[tool.uv.index]]
27
+ name = "pytorch-cu128"
28
+ url = "https://download.pytorch.org/whl/cu128"
29
+ explicit = true
30
+
31
+ [dependency-groups]
32
+ dev = [
33
+ "pytest>=9.0.3",
34
+ "ruff>=0.15.16",
35
+ ]
requirements.txt ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv export --format requirements-txt --no-hashes --no-dev --no-emit-project --no-sources -o requirements.txt
3
+ annotated-doc==0.0.4
4
+ # via
5
+ # fastapi
6
+ # typer
7
+ annotated-types==0.7.0
8
+ # via pydantic
9
+ anyio==4.13.0
10
+ # via
11
+ # gradio
12
+ # httpx
13
+ # starlette
14
+ audioop-lts==0.2.2 ; python_full_version >= '3.13'
15
+ # via gradio
16
+ beautifulsoup4==4.15.0
17
+ # via siphon
18
+ brotli==1.2.0
19
+ # via gradio
20
+ certifi==2026.5.20
21
+ # via
22
+ # httpcore
23
+ # httpx
24
+ # pyproj
25
+ # requests
26
+ charset-normalizer==3.4.7
27
+ # via requests
28
+ click==8.4.1
29
+ # via
30
+ # huggingface-hub
31
+ # typer
32
+ # uvicorn
33
+ colorama==0.4.6 ; sys_platform == 'win32'
34
+ # via
35
+ # click
36
+ # tqdm
37
+ contourpy==1.3.3
38
+ # via matplotlib
39
+ cuda-bindings==13.3.1 ; sys_platform == 'linux'
40
+ # via torch
41
+ cuda-pathfinder==1.5.5 ; sys_platform == 'linux'
42
+ # via cuda-bindings
43
+ cuda-toolkit==13.0.2 ; sys_platform == 'linux'
44
+ # via torch
45
+ cycler==0.12.1
46
+ # via matplotlib
47
+ fastapi==0.136.3
48
+ # via gradio
49
+ filelock==3.29.1
50
+ # via
51
+ # huggingface-hub
52
+ # torch
53
+ flexcache==0.3
54
+ # via pint
55
+ flexparser==0.4
56
+ # via pint
57
+ fonttools==4.63.0
58
+ # via matplotlib
59
+ fsspec==2026.4.0
60
+ # via
61
+ # gradio-client
62
+ # huggingface-hub
63
+ # torch
64
+ gradio==6.17.3
65
+ # via skyread
66
+ gradio-client==2.5.0
67
+ # via
68
+ # gradio
69
+ # hf-gradio
70
+ groovy==0.1.2
71
+ # via gradio
72
+ h11==0.16.0
73
+ # via
74
+ # httpcore
75
+ # uvicorn
76
+ hf-gradio==0.4.1
77
+ # via gradio
78
+ hf-xet==1.5.1 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
79
+ # via huggingface-hub
80
+ httpcore==1.0.9
81
+ # via httpx
82
+ httpx==0.28.1
83
+ # via
84
+ # gradio
85
+ # gradio-client
86
+ # huggingface-hub
87
+ # safehttpx
88
+ huggingface-hub==1.19.0
89
+ # via
90
+ # gradio
91
+ # gradio-client
92
+ # tokenizers
93
+ # transformers
94
+ idna==3.18
95
+ # via
96
+ # anyio
97
+ # httpx
98
+ # requests
99
+ jinja2==3.1.6
100
+ # via
101
+ # gradio
102
+ # torch
103
+ kiwisolver==1.5.0
104
+ # via matplotlib
105
+ markdown-it-py==4.2.0
106
+ # via rich
107
+ markupsafe==3.0.3
108
+ # via
109
+ # gradio
110
+ # jinja2
111
+ matplotlib==3.10.9
112
+ # via
113
+ # metpy
114
+ # skyread
115
+ mdurl==0.1.2
116
+ # via markdown-it-py
117
+ metpy==1.7.1
118
+ # via skyread
119
+ mpmath==1.3.0
120
+ # via sympy
121
+ networkx==3.6.1
122
+ # via torch
123
+ numpy==2.4.6
124
+ # via
125
+ # contourpy
126
+ # gradio
127
+ # matplotlib
128
+ # metpy
129
+ # pandas
130
+ # scipy
131
+ # siphon
132
+ # transformers
133
+ # xarray
134
+ nvidia-cublas==13.1.1.3 ; sys_platform == 'linux'
135
+ # via
136
+ # nvidia-cudnn-cu13
137
+ # nvidia-cusolver
138
+ # torch
139
+ nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux'
140
+ # via cuda-toolkit
141
+ nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux'
142
+ # via
143
+ # cuda-toolkit
144
+ # nvidia-cublas
145
+ nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux'
146
+ # via cuda-toolkit
147
+ nvidia-cudnn-cu13==9.20.0.48 ; sys_platform == 'linux'
148
+ # via torch
149
+ nvidia-cufft==12.0.0.61 ; sys_platform == 'linux'
150
+ # via cuda-toolkit
151
+ nvidia-cufile==1.15.1.6 ; sys_platform == 'linux'
152
+ # via cuda-toolkit
153
+ nvidia-curand==10.4.0.35 ; sys_platform == 'linux'
154
+ # via cuda-toolkit
155
+ nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux'
156
+ # via cuda-toolkit
157
+ nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux'
158
+ # via
159
+ # cuda-toolkit
160
+ # nvidia-cusolver
161
+ nvidia-cusparselt-cu13==0.8.1 ; sys_platform == 'linux'
162
+ # via torch
163
+ nvidia-nccl-cu13==2.29.7 ; sys_platform == 'linux'
164
+ # via torch
165
+ nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux'
166
+ # via
167
+ # cuda-toolkit
168
+ # nvidia-cufft
169
+ # nvidia-cusolver
170
+ # nvidia-cusparse
171
+ nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux'
172
+ # via torch
173
+ nvidia-nvtx==13.0.85 ; sys_platform == 'linux'
174
+ # via cuda-toolkit
175
+ orjson==3.11.9
176
+ # via gradio
177
+ packaging==26.2
178
+ # via
179
+ # gradio
180
+ # gradio-client
181
+ # huggingface-hub
182
+ # matplotlib
183
+ # pooch
184
+ # transformers
185
+ # xarray
186
+ pandas==3.0.3
187
+ # via
188
+ # gradio
189
+ # metpy
190
+ # siphon
191
+ # skyread
192
+ # xarray
193
+ pillow==12.2.0
194
+ # via
195
+ # gradio
196
+ # matplotlib
197
+ pint==0.25.3
198
+ # via metpy
199
+ platformdirs==4.10.0
200
+ # via
201
+ # pint
202
+ # pooch
203
+ pooch==1.9.0
204
+ # via metpy
205
+ protobuf==7.35.0
206
+ # via siphon
207
+ pydantic==2.13.4
208
+ # via
209
+ # fastapi
210
+ # gradio
211
+ pydantic-core==2.46.4
212
+ # via pydantic
213
+ pydub==0.25.1
214
+ # via gradio
215
+ pygments==2.20.0
216
+ # via rich
217
+ pyparsing==3.3.2
218
+ # via matplotlib
219
+ pyproj==3.7.2
220
+ # via metpy
221
+ python-dateutil==2.9.0.post0
222
+ # via
223
+ # matplotlib
224
+ # pandas
225
+ python-multipart==0.0.32
226
+ # via gradio
227
+ pytz==2026.2
228
+ # via gradio
229
+ pyyaml==6.0.3
230
+ # via
231
+ # gradio
232
+ # huggingface-hub
233
+ # transformers
234
+ regex==2026.5.9
235
+ # via transformers
236
+ requests==2.34.2
237
+ # via
238
+ # pooch
239
+ # siphon
240
+ rich==15.0.0
241
+ # via typer
242
+ safehttpx==0.1.7
243
+ # via gradio
244
+ safetensors==0.8.0
245
+ # via transformers
246
+ scipy==1.17.1
247
+ # via metpy
248
+ semantic-version==2.10.0
249
+ # via gradio
250
+ setuptools==81.0.0
251
+ # via torch
252
+ shellingham==1.5.4
253
+ # via typer
254
+ siphon==0.10.0
255
+ # via skyread
256
+ six==1.17.0
257
+ # via python-dateutil
258
+ soupsieve==2.8.4
259
+ # via beautifulsoup4
260
+ starlette==1.2.1
261
+ # via
262
+ # fastapi
263
+ # gradio
264
+ sympy==1.14.0
265
+ # via torch
266
+ tokenizers==0.22.2
267
+ # via transformers
268
+ tomlkit==0.14.0
269
+ # via gradio
270
+ torch==2.12.0
271
+ # via skyread
272
+ tqdm==4.68.2
273
+ # via
274
+ # huggingface-hub
275
+ # transformers
276
+ traitlets==5.15.1
277
+ # via metpy
278
+ transformers==5.11.0
279
+ # via skyread
280
+ triton==3.7.0 ; sys_platform == 'linux'
281
+ # via torch
282
+ typer==0.25.1
283
+ # via
284
+ # gradio
285
+ # hf-gradio
286
+ # huggingface-hub
287
+ # transformers
288
+ typing-extensions==4.15.0
289
+ # via
290
+ # anyio
291
+ # beautifulsoup4
292
+ # fastapi
293
+ # flexcache
294
+ # flexparser
295
+ # gradio
296
+ # gradio-client
297
+ # huggingface-hub
298
+ # pint
299
+ # pydantic
300
+ # pydantic-core
301
+ # starlette
302
+ # torch
303
+ # typing-inspection
304
+ typing-inspection==0.4.2
305
+ # via
306
+ # fastapi
307
+ # pydantic
308
+ tzdata==2026.2 ; sys_platform == 'emscripten' or sys_platform == 'win32'
309
+ # via pandas
310
+ urllib3==2.7.0
311
+ # via requests
312
+ uvicorn==0.49.0
313
+ # via gradio
314
+ xarray==2026.4.0
315
+ # via metpy
skyread/__init__.py ADDED
File without changes
skyread/indices.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deterministic stability-index computation via MetPy.
2
+
3
+ This is the *non-AI* core of SkyRead: every number here is computed exactly by
4
+ MetPy, not estimated by a model. The LLM layer (:mod:`skyread.interpret`) only
5
+ turns these numbers into plain language.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import metpy.calc as mpcalc
11
+
12
+ from skyread.sounding import Sounding
13
+
14
+
15
+ def compute_indices(snd: Sounding) -> dict[str, float]:
16
+ """Compute the standard convective stability indices for a sounding.
17
+
18
+ Args:
19
+ snd: A parsed sounding.
20
+
21
+ Returns:
22
+ Mapping of index name to a plain float (SI-stripped, rounded), e.g.
23
+ ``cape_jkg``, ``cin_jkg``, ``lcl_hpa``, ``lfc_hpa``, ``el_hpa``,
24
+ ``k_index``, ``lifted_index``, ``total_totals``, ``pwat_mm``. Values
25
+ that cannot be computed (e.g. no LFC) are ``float('nan')``.
26
+ """
27
+ p, t, td = snd.pressure, snd.temperature, snd.dewpoint
28
+
29
+ parcel = mpcalc.parcel_profile(p, t[0], td[0]).to("degC")
30
+ cape, cin = mpcalc.surface_based_cape_cin(p, t, td)
31
+
32
+ lcl_p, _ = mpcalc.lcl(p[0], t[0], td[0])
33
+ lfc_p, _ = mpcalc.lfc(p, t, td)
34
+ el_p, _ = mpcalc.el(p, t, td)
35
+
36
+ def _hpa(q) -> float:
37
+ return round(float(q.to("hPa").magnitude), 1)
38
+
39
+ def _scalar(q, unit: str) -> float:
40
+ return round(float(q.to(unit).magnitude), 1)
41
+
42
+ def _index(q) -> float:
43
+ """Index values (K, LI, TT) are reported in their native degree unit."""
44
+ return round(float(q.magnitude), 1)
45
+
46
+ return {
47
+ "cape_jkg": _scalar(cape, "joule/kilogram"),
48
+ "cin_jkg": _scalar(cin, "joule/kilogram"),
49
+ "lcl_hpa": _hpa(lcl_p),
50
+ "lfc_hpa": _hpa(lfc_p),
51
+ "el_hpa": _hpa(el_p),
52
+ "k_index": _index(mpcalc.k_index(p, t, td)),
53
+ "lifted_index": _index(mpcalc.lifted_index(p, t, parcel)[0]),
54
+ "total_totals": _index(mpcalc.total_totals_index(p, t, td)),
55
+ "pwat_mm": _scalar(mpcalc.precipitable_water(p, td), "mm"),
56
+ }
skyread/interpret.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Turn computed indices into plain-language, dual-layer interpretation.
2
+
3
+ Architecture note — this is where (and the *only* place) AI is load-bearing:
4
+
5
+ * The numbers come from MetPy (exact, deterministic).
6
+ * :func:`build_llm_prompt` is what we hand to a small LLM (e.g. Qwen3-0.6B)
7
+ to produce natural, two-audience interpretation.
8
+ * :func:`interpret_rule_based` is a deterministic stand-in that runs today with
9
+ no model download. It proves the data->language shape end-to-end and doubles
10
+ as a few-shot example / safe fallback for the LLM.
11
+
12
+ Thresholds follow the standard convective-parameter references (K-index, Lifted
13
+ Index, Total Totals, CAPE/CIN).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import math
19
+
20
+ # (lower_bound_inclusive, label, pro_phrase, grandma_phrase)
21
+ _CAPE_BANDS = (
22
+ (4000, "extreme", "極端不穩定", "空氣非常不穩定,會有劇烈雷雨"),
23
+ (2500, "strong", "強烈不穩定", "很可能有強雷雨"),
24
+ (1000, "moderate", "中等不穩定", "下午容易有雷陣雨"),
25
+ (1, "marginal", "弱不穩定", "可能有零星短暫雷雨"),
26
+ (-math.inf, "stable", "穩定", "天氣大致穩定,不太會有雷雨"),
27
+ )
28
+
29
+
30
+ def _cape_band(cape_jkg: float) -> tuple[str, str, str]:
31
+ """Return (label, pro_phrase, grandma_phrase) for a CAPE value."""
32
+ value = 0.0 if math.isnan(cape_jkg) else cape_jkg
33
+ for lower, label, pro, grandma in _CAPE_BANDS:
34
+ if value >= lower:
35
+ return label, pro, grandma
36
+ return "stable", "穩定", "天氣大致穩定"
37
+
38
+
39
+ def assess(indices: dict[str, float]) -> dict[str, str]:
40
+ """Derive a qualitative severity assessment from raw indices.
41
+
42
+ Args:
43
+ indices: Output of :func:`skyread.indices.compute_indices`.
44
+
45
+ Returns:
46
+ Mapping with ``label``, ``pro_phrase``, ``grandma_phrase`` and a
47
+ ``cap_note`` describing the convective inhibition (CIN) barrier.
48
+ """
49
+ label, pro, grandma = _cape_band(indices["cape_jkg"])
50
+
51
+ cin = indices["cin_jkg"]
52
+ if label == "stable":
53
+ # With no CAPE there is nothing for the cap to hold back; talking
54
+ # about "convection starting easily" here reads as a contradiction.
55
+ cap_note = "無明顯對流潛勢"
56
+ elif math.isnan(cin) or cin >= -25:
57
+ cap_note = "幾乎沒有對流抑制,對流容易啟動"
58
+ elif cin >= -100:
59
+ cap_note = "有中等的對流抑制(蓋子),需要日照加熱才會爆發"
60
+ else:
61
+ cap_note = "對流抑制很強,除非有強迫抬升,否則不易發展"
62
+
63
+ return {
64
+ "label": label,
65
+ "pro_phrase": pro,
66
+ "grandma_phrase": grandma,
67
+ "cap_note": cap_note,
68
+ }
69
+
70
+
71
+ def build_grandma_prompt(indices: dict[str, float], name: str) -> str:
72
+ """Build the rewrite prompt handed to a small LLM for the grandma card.
73
+
74
+ The pro card is pure numbers and stays rule-based; only the layperson
75
+ sentence benefits from a natural-language touch. The rule-based grandma
76
+ line is embedded as a factually-correct draft, so the model only rewrites
77
+ tone — a task small models handle far more reliably than free generation.
78
+
79
+ Args:
80
+ indices: Output of :func:`skyread.indices.compute_indices`.
81
+ name: Label of the sounding (station / case name).
82
+
83
+ Returns:
84
+ A ready-to-send prompt string requesting a single rewritten sentence.
85
+ """
86
+ draft = interpret_rule_based(indices, name)["grandma"].removeprefix("【生活版】")
87
+ return (
88
+ "把這句天氣提醒改寫成更口語、更親切的說法"
89
+ "(講給長輩聽,一到兩句):\n"
90
+ f"「{draft}」\n"
91
+ "保留原本的結論與建議,不要新增資訊。只輸出改寫後的句子。"
92
+ "全程使用臺灣慣用的繁體中文,絕對不可出現任何簡體字。"
93
+ )
94
+
95
+
96
+ def interpret_rule_based(indices: dict[str, float], name: str) -> dict[str, str]:
97
+ """Produce dual-layer cards deterministically (no model required).
98
+
99
+ Args:
100
+ indices: Output of :func:`skyread.indices.compute_indices`.
101
+ name: Label of the sounding.
102
+
103
+ Returns:
104
+ Mapping with ``pro`` and ``grandma`` card text.
105
+ """
106
+ a = assess(indices)
107
+
108
+ def _lvl(key: str) -> str:
109
+ value = indices[key]
110
+ return "資料未及" if math.isnan(value) else f"{value:.0f} hPa"
111
+
112
+ pro = (
113
+ f"【同行版 · {name}】"
114
+ f"CAPE {indices['cape_jkg']:.0f} J/kg、"
115
+ f"CIN {indices['cin_jkg']:.0f} J/kg、"
116
+ f"LI {indices['lifted_index']:.0f}、"
117
+ f"K {indices['k_index']:.0f}、"
118
+ f"TT {indices['total_totals']:.0f}。"
119
+ f"大氣呈{a['pro_phrase']},{a['cap_note']}。"
120
+ f"LFC≈{_lvl('lfc_hpa')}、EL≈{_lvl('el_hpa')},"
121
+ f"可降水量 {indices['pwat_mm']:.0f} mm。"
122
+ )
123
+ grandma = f"【生活版】{a['grandma_phrase']}。"
124
+ if a["label"] in ("moderate", "strong", "extreme"):
125
+ grandma += "出門記得帶傘,棉被先別曬,午後盡量避免在空曠處。☔"
126
+ elif a["label"] == "marginal":
127
+ grandma += "出門帶把傘以防萬一,棉被早點曬、早點收。🌦️"
128
+ else:
129
+ grandma += "今天適合外出,棉被可以放心曬。☀️"
130
+ return {"pro": pro, "grandma": grandma}
skyread/live.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch the latest real sounding from the University of Wyoming archive.
2
+
3
+ Network access happens only here. Any failure should be caught by the caller
4
+ (the app falls back to bundled examples), so a dead upstream never kills a demo.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timedelta, timezone
10
+
11
+ from siphon.simplewebservice.wyoming import WyomingUpperAir
12
+
13
+ from skyread.sounding import Sounding, from_wyoming_dataframe
14
+
15
+ # Taiwan's own stations (Banqiao 46692, Pingtung 46810) are not published to
16
+ # the Wyoming archive, so we offer the nearest reliably-available stations.
17
+ STATIONS: dict[str, str] = {
18
+ "石垣島 47918(離台灣最近,~270km)": "47918",
19
+ "香港京士柏 45004": "45004",
20
+ "奄美名瀨 47909": "47909",
21
+ "日本館野 47646": "47646",
22
+ }
23
+ DEFAULT_STATION = "47918"
24
+
25
+ _COLUMNS = ["pressure", "height", "temperature", "dewpoint", "direction", "speed"]
26
+
27
+
28
+ def _latest_synoptic(now: datetime) -> datetime:
29
+ """Round ``now`` down to the most recent 00Z/12Z synoptic hour."""
30
+ base = now.replace(minute=0, second=0, microsecond=0)
31
+ return base.replace(hour=12) if base.hour >= 12 else base.replace(hour=0)
32
+
33
+
34
+ def latest_sounding(station: str = DEFAULT_STATION, max_lookback: int = 4) -> Sounding:
35
+ """Fetch the most recent sounding, stepping back 12 h per attempt.
36
+
37
+ Args:
38
+ station: WMO station identifier.
39
+ max_lookback: How many 12-hourly synoptic times to try.
40
+
41
+ Returns:
42
+ The parsed :class:`Sounding`, named ``"<station> <time>Z"``.
43
+
44
+ Raises:
45
+ RuntimeError: If no sounding exists within the lookback window.
46
+ """
47
+ candidate = _latest_synoptic(datetime.now(timezone.utc))
48
+ for _ in range(max_lookback):
49
+ try:
50
+ df = WyomingUpperAir.request_data(candidate.replace(tzinfo=None), station)
51
+ except ValueError: # Wyoming returns this when the hour has no data yet
52
+ candidate -= timedelta(hours=12)
53
+ continue
54
+ name = f"{station} {candidate:%Y-%m-%d %H}Z"
55
+ return from_wyoming_dataframe(df[_COLUMNS], name=name)
56
+ raise RuntimeError(
57
+ f"No sounding for station {station} in the last {max_lookback * 12} hours"
58
+ )
skyread/llm.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Small-LLM rewriting of the grandma card, with deterministic fallback.
2
+
3
+ Model layering (the hackathon's "honest small-model fit" story):
4
+
5
+ * MetPy computes every number exactly (:mod:`skyread.indices`).
6
+ * The pro card is pure numbers, so it stays rule-based — exact by design.
7
+ * A small LLM only *rewrites* the layperson sentence from a factually-correct
8
+ draft, the one place natural language genuinely matters.
9
+ * Any failure (load, generation, malformed output) silently falls back to
10
+ the rule-based cards, so the app never breaks on stage.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import os
17
+ import re
18
+ import threading
19
+ from functools import lru_cache
20
+
21
+ from skyread.interpret import build_grandma_prompt, interpret_rule_based
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Qwen3-0.6B: validated 100% Traditional-Chinese-clean on the GPU box
26
+ # (MiniCPM3-4B kept slipping into Simplified on unstable-weather wording and
27
+ # needs the old transformers 4.46 pin — see PROGRESS notes, 2026-06-11).
28
+ MODEL_ID = os.environ.get("SKYREAD_MODEL_ID", "Qwen/Qwen3-0.6B")
29
+
30
+ _MAX_REWRITE_CHARS = 180
31
+ _MAX_ATTEMPTS = 3
32
+
33
+ # High-frequency simplified-only characters: one hit means the model slipped
34
+ # out of Traditional Chinese, so the rewrite is rejected. Shared forms that
35
+ # are also standard in Taiwan (e.g. 后 in 皇后, 台, 干, 呆) are deliberately
36
+ # excluded only when ambiguity is likely; the gate is biased toward rejecting,
37
+ # since the fallback is graceful.
38
+ _SIMPLIFIED_CHARS = frozenset(
39
+ "记伞来这为时说对让们个无气电视见车东转动书长门点云飞应过头实发现别样"
40
+ "认师问题难岁热闹风阴湿预报员变坏轻紧稳鲜盖旷阵处带备凉润闷强从众传写"
41
+ "决刚务医华单压历双叶号听响围国图块坚执扩扫护担拥挂损换据断显晓暂术机"
42
+ "杂权条极标树桥梦检楼归录忆怀态总惊惯愿凭"
43
+ "会还没几开关边儿学间阳雾闪温适当满离远进节随虽谢请"
44
+ )
45
+
46
+
47
+ def _pick_device() -> str: # pragma: no cover - hardware dependent
48
+ """Best available device: CUDA, then Apple MPS, then CPU."""
49
+ import torch
50
+
51
+ if torch.cuda.is_available():
52
+ return "cuda"
53
+ if torch.backends.mps.is_available():
54
+ return "mps"
55
+ return "cpu"
56
+
57
+
58
+ _LOAD_LOCK = threading.Lock()
59
+
60
+
61
+ @lru_cache(maxsize=1)
62
+ def _load_model_once(): # pragma: no cover - exercised manually / on the Space
63
+ """Load tokenizer and model (call via :func:`_load_model`)."""
64
+ from transformers import AutoModelForCausalLM, AutoTokenizer
65
+
66
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
67
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
68
+ model.to(_pick_device())
69
+ model.eval()
70
+ return tokenizer, model
71
+
72
+
73
+ def _load_model(): # pragma: no cover - thin thread-safety wrapper
74
+ """Thread-safe single load: the warm-up thread may race the first request.
75
+
76
+ ``lru_cache`` alone does not serialise concurrent first calls — two
77
+ threads can both miss the cache and load the model twice.
78
+ """
79
+ with _LOAD_LOCK:
80
+ return _load_model_once()
81
+
82
+
83
+ def _generate(prompt: str) -> str: # pragma: no cover - needs model weights
84
+ """Run one chat-formatted sampled generation and return the new text only.
85
+
86
+ Sampling (not greedy) on purpose: a rejected output would otherwise be
87
+ deterministic, making the retry loop in :func:`interpret_llm` useless.
88
+ """
89
+ import torch
90
+
91
+ tokenizer, model = _load_model()
92
+ encoded = tokenizer.apply_chat_template(
93
+ [{"role": "user", "content": prompt}],
94
+ add_generation_prompt=True,
95
+ enable_thinking=False, # Qwen3: skip <think> blocks; no-op elsewhere
96
+ return_tensors="pt",
97
+ return_dict=True,
98
+ ).to(model.device)
99
+ with torch.no_grad():
100
+ output = model.generate(
101
+ **encoded,
102
+ max_new_tokens=96,
103
+ do_sample=True,
104
+ temperature=0.6,
105
+ top_p=0.9,
106
+ )
107
+ new_tokens = output[0][encoded["input_ids"].shape[1] :]
108
+ return tokenizer.decode(new_tokens, skip_special_tokens=True)
109
+
110
+
111
+ def _clean_rewrite(text: str) -> str | None:
112
+ """Validate and normalise a rewrite; ``None`` if it is not usable."""
113
+ line = text.strip().strip("「」\"' \n")
114
+ if not line or len(line) > _MAX_REWRITE_CHARS:
115
+ return None
116
+ if any(marker in line for marker in ("改寫", "原句", "輸出")):
117
+ return None # instruction echo, not a rewrite
118
+ if not re.search(r"[一-鿿]", line):
119
+ return None
120
+ if any(char in _SIMPLIFIED_CHARS for char in line):
121
+ return None # slipped into Simplified Chinese
122
+ return line
123
+
124
+
125
+ def interpret_llm(indices: dict[str, float], name: str) -> tuple[dict[str, str], str]:
126
+ """Interpret indices, rewriting the grandma card with a small LLM.
127
+
128
+ Args:
129
+ indices: Output of :func:`skyread.indices.compute_indices`.
130
+ name: Label of the sounding.
131
+
132
+ Returns:
133
+ ``(cards, engine)`` where ``engine`` is ``"llm"`` or ``"rule-based"``.
134
+ """
135
+ cards = interpret_rule_based(indices, name)
136
+ prompt = build_grandma_prompt(indices, name)
137
+ try:
138
+ for attempt in range(_MAX_ATTEMPTS):
139
+ raw = _generate(prompt)
140
+ rewritten = _clean_rewrite(raw)
141
+ if rewritten is not None:
142
+ return {**cards, "grandma": "【生活版】" + rewritten}, "llm"
143
+ logger.warning(
144
+ "LLM rewrite unusable (attempt %d/%d): %r",
145
+ attempt + 1,
146
+ _MAX_ATTEMPTS,
147
+ raw[:200],
148
+ )
149
+ except Exception:
150
+ logger.exception("LLM generation failed, falling back")
151
+ return cards, "rule-based"
152
+
153
+
154
+ def warm_up() -> None:
155
+ """Eagerly load the model (call from a background thread at app start)."""
156
+ try:
157
+ _load_model()
158
+ except Exception: # pragma: no cover
159
+ logger.exception("Model warm-up failed; rule-based fallback will be used")
skyread/plot.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Render a Skew-T / Log-P diagram from a sounding.
2
+
3
+ Uses MetPy's :class:`~metpy.plots.SkewT` so the plotted curves come straight
4
+ from the data — no chart-image reading involved. A Matplotlib ``Figure`` is
5
+ returned so Gradio's ``gr.Plot`` can display it directly.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import matplotlib
11
+
12
+ matplotlib.use("Agg") # headless backend for server-side rendering
13
+
14
+ import metpy.calc as mpcalc # noqa: E402
15
+ from matplotlib.figure import Figure # noqa: E402
16
+ from metpy.plots import SkewT # noqa: E402
17
+
18
+ from skyread.sounding import Sounding # noqa: E402
19
+
20
+
21
+ def make_skewt(snd: Sounding) -> Figure:
22
+ """Build a Skew-T figure (temperature, dewpoint, parcel path, CAPE/CIN).
23
+
24
+ The figure is constructed directly (not via pyplot), so it is never
25
+ registered in pyplot's global manager — repeated requests on a long-lived
26
+ server would otherwise leak every figure.
27
+
28
+ Args:
29
+ snd: A parsed sounding.
30
+
31
+ Returns:
32
+ A Matplotlib figure ready for display or saving.
33
+ """
34
+ fig = Figure(figsize=(7, 8))
35
+ skew = SkewT(fig, rotation=45)
36
+
37
+ skew.plot(
38
+ snd.pressure, snd.temperature, "tab:red", linewidth=2, label="Temperature"
39
+ )
40
+ skew.plot(snd.pressure, snd.dewpoint, "tab:green", linewidth=2, label="Dewpoint")
41
+ skew.plot_barbs(snd.pressure[::3], snd.u_wind[::3], snd.v_wind[::3])
42
+
43
+ parcel = mpcalc.parcel_profile(
44
+ snd.pressure, snd.temperature[0], snd.dewpoint[0]
45
+ ).to("degC")
46
+ skew.plot(
47
+ snd.pressure, parcel, "black", linewidth=1.5, linestyle="--", label="Parcel"
48
+ )
49
+ skew.shade_cape(snd.pressure, snd.temperature, parcel)
50
+ skew.shade_cin(snd.pressure, snd.temperature, parcel)
51
+
52
+ skew.plot_dry_adiabats(alpha=0.3)
53
+ skew.plot_moist_adiabats(alpha=0.3)
54
+ skew.plot_mixing_lines(alpha=0.3)
55
+
56
+ skew.ax.set_xlim(-40, 50)
57
+ skew.ax.set_ylim(1050, 100)
58
+ skew.ax.set_xlabel("Temperature (°C)")
59
+ skew.ax.set_ylabel("Pressure (hPa)")
60
+ skew.ax.set_title(f"Skew-T / Log-P — {snd.name}")
61
+ skew.ax.legend(loc="upper right", fontsize=8)
62
+ return fig
skyread/sounding.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load radiosonde sounding data into a units-aware structure.
2
+
3
+ This module is pure I/O + parsing. It returns MetPy/pint quantities ready to be
4
+ fed into :mod:`skyread.indices`. Two sources are supported for the spike:
5
+
6
+ * MetPy's bundled sample soundings (``get_test_data``) — zero network, perfect
7
+ for demos that must never break.
8
+ * University-of-Wyoming-style fixed-width text (also the IGRA2-export shape) —
9
+ the format users upload or fetch online.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+
16
+ import pandas as pd
17
+ from metpy.calc import wind_components
18
+ from metpy.cbook import get_test_data
19
+ from metpy.units import units
20
+ from pint import Quantity
21
+
22
+
23
+ @dataclass
24
+ class Sounding:
25
+ """A parsed atmospheric sounding with units attached.
26
+
27
+ Attributes:
28
+ pressure: Pressure profile (hPa), decreasing upward.
29
+ temperature: Environmental temperature profile (degC).
30
+ dewpoint: Dewpoint temperature profile (degC).
31
+ u_wind: Zonal wind component (knots).
32
+ v_wind: Meridional wind component (knots).
33
+ name: Human-readable label for the sounding.
34
+ """
35
+
36
+ pressure: Quantity
37
+ temperature: Quantity
38
+ dewpoint: Quantity
39
+ u_wind: Quantity
40
+ v_wind: Quantity
41
+ name: str
42
+
43
+
44
+ def from_wyoming_dataframe(df: pd.DataFrame, name: str) -> Sounding:
45
+ """Build a :class:`Sounding` from a Wyoming-style dataframe.
46
+
47
+ Args:
48
+ df: Columns ``pressure, height, temperature, dewpoint, direction, speed``.
49
+ name: Label for the sounding.
50
+
51
+ Returns:
52
+ A units-aware :class:`Sounding`.
53
+ """
54
+ df = df.dropna(
55
+ subset=("temperature", "dewpoint", "direction", "speed"), how="all"
56
+ ).reset_index(drop=True)
57
+
58
+ pressure = df["pressure"].to_numpy() * units.hPa
59
+ temperature = df["temperature"].to_numpy() * units.degC
60
+ dewpoint = df["dewpoint"].to_numpy() * units.degC
61
+ u_wind, v_wind = wind_components(
62
+ df["speed"].to_numpy() * units.knots,
63
+ df["direction"].to_numpy() * units.deg,
64
+ )
65
+ return Sounding(pressure, temperature, dewpoint, u_wind, v_wind, name)
66
+
67
+
68
+ def load_csv(path: str, name: str = "uploaded") -> Sounding:
69
+ """Load a user-uploaded CSV sounding.
70
+
71
+ Expected columns (header row, case-insensitive): ``pressure, temperature,
72
+ dewpoint, direction, speed``. Pressure in hPa, temperatures in degC,
73
+ direction in degrees, speed in knots.
74
+
75
+ Args:
76
+ path: Path to the CSV file.
77
+ name: Label for the sounding.
78
+
79
+ Returns:
80
+ The parsed :class:`Sounding`.
81
+
82
+ Raises:
83
+ ValueError: If required columns are missing.
84
+ """
85
+ df = pd.read_csv(path)
86
+ df.columns = [c.strip().lower() for c in df.columns]
87
+ required = {"pressure", "temperature", "dewpoint", "direction", "speed"}
88
+ missing = required - set(df.columns)
89
+ if missing:
90
+ raise ValueError(f"CSV missing columns: {sorted(missing)}")
91
+ return from_wyoming_dataframe(df[list(required)], name=name)
92
+
93
+
94
+ def load_sample(name: str = "may4_sounding.txt") -> Sounding:
95
+ """Load a MetPy bundled sample sounding (no network at runtime).
96
+
97
+ Args:
98
+ name: One of ``may4_sounding.txt``, ``jan20_sounding.txt``,
99
+ ``nov11_sounding.txt``.
100
+
101
+ Returns:
102
+ The parsed :class:`Sounding`.
103
+ """
104
+ path = get_test_data(name, as_file_obj=False)
105
+ col_names = ["pressure", "height", "temperature", "dewpoint", "direction", "speed"]
106
+ df = pd.read_fwf(path, skiprows=5, usecols=[0, 1, 2, 3, 6, 7], names=col_names)
107
+ return from_wyoming_dataframe(df, name=name.replace("_sounding.txt", ""))
skyread/spike.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """End-to-end spike: sounding -> MetPy indices -> dual-layer interpretation.
2
+
3
+ Run:
4
+ uv run python -m skyread.spike
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from skyread.indices import compute_indices
10
+ from skyread.interpret import build_grandma_prompt, interpret_rule_based
11
+ from skyread.sounding import load_sample
12
+
13
+
14
+ def main() -> None:
15
+ """Run the full SkyRead chain on a bundled sample sounding and print it."""
16
+ snd = load_sample("may4_sounding.txt")
17
+ print(f"# Sounding: {snd.name} ({len(snd.pressure)} levels)\n")
18
+
19
+ indices = compute_indices(snd)
20
+ print("## Step 1 — MetPy computed indices (deterministic):")
21
+ for key, value in indices.items():
22
+ print(f" {key:>14}: {value}")
23
+
24
+ cards = interpret_rule_based(indices, snd.name)
25
+ print("\n## Step 2 — dual-layer cards (rule-based draft / fallback):")
26
+ print(" " + cards["pro"])
27
+ print(" " + cards["grandma"])
28
+
29
+ print("\n## Step 3 — the rewrite prompt that goes to the small LLM:")
30
+ print(" " + build_grandma_prompt(indices, snd.name).replace("\n", "\n "))
31
+
32
+
33
+ if __name__ == "__main__":
34
+ main()
tests/__init__.py ADDED
File without changes
tests/test_app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the Gradio glue layer (no model download, no network)."""
2
+
3
+ from pathlib import Path
4
+
5
+ import app
6
+
7
+
8
+ def test_analyze_surfaces_compute_errors_as_message(tmp_path: Path) -> None:
9
+ # Parses fine (all columns present) but is physically unusable: the
10
+ # failure happens in compute_indices, not in loading.
11
+ bad = tmp_path / "empty.csv"
12
+ bad.write_text("pressure,temperature,dewpoint,direction,speed\n")
13
+ fig, pro, grandma, badge = app.analyze(
14
+ app.SOURCE_UPLOAD, "", "", str(bad), use_llm=False
15
+ )
16
+ assert fig is None
17
+ assert pro.startswith("⚠️")
18
+
19
+
20
+ def test_analyze_example_rule_based_returns_cards() -> None:
21
+ fig, pro, grandma, badge = app.analyze(
22
+ app.SOURCE_EXAMPLE, "", next(iter(app.EXAMPLES)), None, use_llm=False
23
+ )
24
+ assert fig is not None
25
+ assert pro.startswith("【同行版")
26
+ assert grandma.startswith("【生活版】")
27
+ assert "MetPy" in badge
tests/test_indices.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Baseline tests for deterministic index computation (synthetic profile)."""
2
+
3
+ import numpy as np
4
+ from metpy.units import units
5
+
6
+ from skyread.indices import compute_indices
7
+ from skyread.sounding import Sounding
8
+
9
+ EXPECTED_KEYS = {
10
+ "cape_jkg",
11
+ "cin_jkg",
12
+ "lcl_hpa",
13
+ "lfc_hpa",
14
+ "el_hpa",
15
+ "k_index",
16
+ "lifted_index",
17
+ "total_totals",
18
+ "pwat_mm",
19
+ }
20
+
21
+
22
+ def _synthetic_sounding() -> Sounding:
23
+ """A hand-made conditionally-unstable profile (9 levels)."""
24
+ pressure = (
25
+ np.array([1000.0, 925.0, 850.0, 700.0, 500.0, 400.0, 300.0, 250.0, 200.0])
26
+ * units.hPa
27
+ )
28
+ temperature = (
29
+ np.array([30.0, 24.0, 18.0, 8.0, -10.0, -22.0, -38.0, -48.0, -55.0])
30
+ * units.degC
31
+ )
32
+ dewpoint = (
33
+ np.array([24.0, 20.0, 14.0, 2.0, -20.0, -35.0, -55.0, -65.0, -70.0])
34
+ * units.degC
35
+ )
36
+ zeros = np.zeros(9) * units.knots
37
+ return Sounding(pressure, temperature, dewpoint, zeros, zeros, "synthetic")
38
+
39
+
40
+ def test_compute_indices_returns_all_expected_keys() -> None:
41
+ assert set(compute_indices(_synthetic_sounding())) == EXPECTED_KEYS
42
+
43
+
44
+ def test_compute_indices_unstable_profile_has_positive_cape() -> None:
45
+ assert compute_indices(_synthetic_sounding())["cape_jkg"] > 0
46
+
47
+
48
+ def test_compute_indices_values_are_plain_floats() -> None:
49
+ assert all(
50
+ isinstance(v, float) for v in compute_indices(_synthetic_sounding()).values()
51
+ )
tests/test_interpret.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Baseline tests for the rule-based interpretation layer."""
2
+
3
+ from skyread.interpret import assess, build_grandma_prompt, interpret_rule_based
4
+
5
+
6
+ def _indices(cape: float, cin: float) -> dict[str, float]:
7
+ return {
8
+ "cape_jkg": cape,
9
+ "cin_jkg": cin,
10
+ "lcl_hpa": 900.0,
11
+ "lfc_hpa": 850.0,
12
+ "el_hpa": 200.0,
13
+ "k_index": 36.0,
14
+ "lifted_index": -6.0,
15
+ "total_totals": 53.0,
16
+ "pwat_mm": 40.0,
17
+ }
18
+
19
+
20
+ def test_assess_extreme_cape_returns_extreme_label() -> None:
21
+ assert assess(_indices(4500.0, -10.0))["label"] == "extreme"
22
+
23
+
24
+ def test_assess_zero_cape_returns_stable_label() -> None:
25
+ assert assess(_indices(0.0, 0.0))["label"] == "stable"
26
+
27
+
28
+ def test_assess_nan_cape_treated_as_stable() -> None:
29
+ assert assess(_indices(float("nan"), 0.0))["label"] == "stable"
30
+
31
+
32
+ def test_assess_stable_cap_note_does_not_claim_easy_initiation() -> None:
33
+ # CAPE 0 + CIN 0 means "no convection at all", not "convection starts easily".
34
+ note = assess(_indices(0.0, 0.0))["cap_note"]
35
+ assert "容易啟動" not in note
36
+
37
+
38
+ def test_interpret_rule_based_marginal_advises_umbrella_not_carefree_sunning() -> None:
39
+ # "可能有雷雨" must not be followed by "棉被可以放心曬".
40
+ cards = interpret_rule_based(_indices(300.0, -200.0), "test")
41
+ assert "傘" in cards["grandma"]
42
+ assert "放心曬" not in cards["grandma"]
43
+
44
+
45
+ def test_interpret_rule_based_unstable_advises_umbrella() -> None:
46
+ cards = interpret_rule_based(_indices(2000.0, -50.0), "test")
47
+ assert "帶傘" in cards["grandma"]
48
+
49
+
50
+ def test_interpret_rule_based_stable_allows_sunbathing_quilt() -> None:
51
+ cards = interpret_rule_based(_indices(0.0, 0.0), "test")
52
+ assert "曬" in cards["grandma"]
53
+
54
+
55
+ def test_build_grandma_prompt_embeds_rule_based_draft() -> None:
56
+ prompt = build_grandma_prompt(_indices(2000.0, -50.0), "test")
57
+ draft = interpret_rule_based(_indices(2000.0, -50.0), "test")
58
+ assert draft["grandma"].removeprefix("【生活版】") in prompt
59
+
60
+
61
+ def test_build_grandma_prompt_requests_rewrite_only() -> None:
62
+ prompt = build_grandma_prompt(_indices(1500.0, -30.0), "test")
63
+ assert "繁體中文" in prompt
64
+ assert "只輸出改寫後的句子" in prompt
tests/test_live.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for live-sounding time logic (network calls are not tested here)."""
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ from skyread.live import _latest_synoptic
6
+
7
+
8
+ def test_latest_synoptic_morning_rounds_to_00z() -> None:
9
+ now = datetime(2026, 6, 11, 3, 30, tzinfo=timezone.utc)
10
+ assert _latest_synoptic(now) == datetime(2026, 6, 11, 0, 0, tzinfo=timezone.utc)
11
+
12
+
13
+ def test_latest_synoptic_afternoon_rounds_to_12z() -> None:
14
+ now = datetime(2026, 6, 11, 15, 0, tzinfo=timezone.utc)
15
+ assert _latest_synoptic(now) == datetime(2026, 6, 11, 12, 0, tzinfo=timezone.utc)
16
+
17
+
18
+ def test_latest_synoptic_exactly_noon_is_12z() -> None:
19
+ now = datetime(2026, 6, 11, 12, 0, tzinfo=timezone.utc)
20
+ assert _latest_synoptic(now) == datetime(2026, 6, 11, 12, 0, tzinfo=timezone.utc)
tests/test_llm.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the LLM rewrite layer (no model download needed)."""
2
+
3
+ import pytest
4
+
5
+ from skyread import llm
6
+
7
+ _INDICES = {
8
+ "cape_jkg": 1500.0,
9
+ "cin_jkg": -30.0,
10
+ "lcl_hpa": 900.0,
11
+ "lfc_hpa": 850.0,
12
+ "el_hpa": 200.0,
13
+ "k_index": 36.0,
14
+ "lifted_index": -4.0,
15
+ "total_totals": 50.0,
16
+ "pwat_mm": 45.0,
17
+ }
18
+
19
+
20
+ def test_clean_rewrite_accepts_normal_sentence() -> None:
21
+ text = "「今天下午會打雷,出門帶把傘卡安心。」"
22
+ assert llm._clean_rewrite(text) == "今天下午會打雷,出門帶把傘卡安心。"
23
+
24
+
25
+ def test_clean_rewrite_rejects_instruction_echo() -> None:
26
+ assert llm._clean_rewrite("好的,以下是改寫後的句子:") is None
27
+
28
+
29
+ def test_clean_rewrite_rejects_empty_and_non_chinese() -> None:
30
+ assert llm._clean_rewrite(" ") is None
31
+ assert llm._clean_rewrite("Sure! Here is the sentence.") is None
32
+
33
+
34
+ def test_clean_rewrite_rejects_overlong_output() -> None:
35
+ assert llm._clean_rewrite("雨" * 300) is None
36
+
37
+
38
+ def test_clean_rewrite_rejects_simplified_chinese() -> None:
39
+ # Real failure modes observed from MiniCPM3-4B on the GPU box.
40
+ assert llm._clean_rewrite("下午有機會打雷下雨,记得帶把伞。") is None
41
+ assert llm._clean_rewrite("今天天氣挺稳当的,不太會打雷下雨。") is None
42
+ assert llm._clean_rewrite("棉被可以拿出来晒太陽。") is None
43
+ assert llm._clean_rewrite("可能会有小小滴雨滴哦!") is None
44
+ assert llm._clean_rewrite("今天温度舒适,适合外出。") is None
45
+
46
+
47
+ def test_clean_rewrite_accepts_pure_traditional_sentence() -> None:
48
+ text = "下午可能會打雷下雨,記得帶把傘,棉被先別曬喔。"
49
+ assert llm._clean_rewrite(text) == text
50
+
51
+
52
+ def test_interpret_llm_falls_back_when_generation_fails(
53
+ monkeypatch: pytest.MonkeyPatch,
54
+ ) -> None:
55
+ def boom(prompt: str) -> str:
56
+ raise RuntimeError("model exploded")
57
+
58
+ monkeypatch.setattr(llm, "_generate", boom)
59
+ cards, engine = llm.interpret_llm(_INDICES, "test")
60
+ assert engine == "rule-based"
61
+ assert cards["pro"].startswith("【同行版")
62
+ assert cards["grandma"].startswith("【生活版】")
63
+
64
+
65
+ def test_interpret_llm_falls_back_when_output_unusable(
66
+ monkeypatch: pytest.MonkeyPatch,
67
+ ) -> None:
68
+ monkeypatch.setattr(llm, "_generate", lambda prompt: "Here you go!")
69
+ cards, engine = llm.interpret_llm(_INDICES, "test")
70
+ assert engine == "rule-based"
71
+
72
+
73
+ def test_interpret_llm_retries_until_a_usable_rewrite(
74
+ monkeypatch: pytest.MonkeyPatch,
75
+ ) -> None:
76
+ outputs = iter(["可能会有雨。", "下午可能下雨,帶把傘較安心。"])
77
+ monkeypatch.setattr(llm, "_generate", lambda prompt: next(outputs))
78
+ cards, engine = llm.interpret_llm(_INDICES, "test")
79
+ assert engine == "llm"
80
+ assert cards["grandma"] == "【生活版】下午可能下雨,帶把傘較安心。"
81
+
82
+
83
+ def test_interpret_llm_gives_up_after_max_attempts(
84
+ monkeypatch: pytest.MonkeyPatch,
85
+ ) -> None:
86
+ calls: list[str] = []
87
+
88
+ def always_simplified(prompt: str) -> str:
89
+ calls.append(prompt)
90
+ return "可能会有雨。"
91
+
92
+ monkeypatch.setattr(llm, "_generate", always_simplified)
93
+ cards, engine = llm.interpret_llm(_INDICES, "test")
94
+ assert engine == "rule-based"
95
+ assert len(calls) == llm._MAX_ATTEMPTS
96
+
97
+
98
+ def test_interpret_llm_rewrites_only_grandma_card(
99
+ monkeypatch: pytest.MonkeyPatch,
100
+ ) -> None:
101
+ monkeypatch.setattr(llm, "_generate", lambda prompt: "下午會打雷,帶傘較妥當。")
102
+ cards, engine = llm.interpret_llm(_INDICES, "test")
103
+ assert engine == "llm"
104
+ assert cards["grandma"] == "【生活版】下午會打雷,帶傘較妥當。"
105
+ assert cards["pro"].startswith("【同行版") # untouched, rule-based numbers
tests/test_plot.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the Skew-T renderer (headless, no display)."""
2
+
3
+ import numpy as np
4
+ from matplotlib.figure import Figure
5
+ from metpy.units import units
6
+
7
+ from skyread.plot import make_skewt
8
+ from skyread.sounding import Sounding
9
+
10
+
11
+ def _sounding() -> Sounding:
12
+ pressure = (
13
+ np.array([1000.0, 925.0, 850.0, 700.0, 500.0, 400.0, 300.0, 250.0, 200.0])
14
+ * units.hPa
15
+ )
16
+ temperature = (
17
+ np.array([30.0, 24.0, 18.0, 8.0, -10.0, -22.0, -38.0, -48.0, -55.0])
18
+ * units.degC
19
+ )
20
+ dewpoint = (
21
+ np.array([24.0, 20.0, 14.0, 2.0, -20.0, -35.0, -55.0, -65.0, -70.0])
22
+ * units.degC
23
+ )
24
+ zeros = np.zeros(9) * units.knots
25
+ return Sounding(pressure, temperature, dewpoint, zeros, zeros, "synthetic")
26
+
27
+
28
+ def test_make_skewt_returns_figure_outside_pyplot_registry() -> None:
29
+ import matplotlib.pyplot as plt
30
+
31
+ before = plt.get_fignums()
32
+ fig = make_skewt(_sounding())
33
+ assert isinstance(fig, Figure)
34
+ # Figures must not accumulate in pyplot's global manager (memory leak on
35
+ # a long-lived Space).
36
+ assert plt.get_fignums() == before