anapaulagomes commited on
Commit
7f286eb
Β·
verified Β·
1 Parent(s): 2e2cb5b

Sync from GitHub via hub-sync

Browse files
Files changed (3) hide show
  1. filter_engine.py +71 -13
  2. pyproject.toml +2 -2
  3. uv.lock +0 -0
filter_engine.py CHANGED
@@ -1,6 +1,6 @@
1
  import marimo
2
 
3
- __generated_with = "0.14.16"
4
  app = marimo.App(
5
  width="medium",
6
  app_title="Open Syndrome Definition - Data Browser",
@@ -100,16 +100,64 @@ def _(go, pl):
100
 
101
  @app.cell
102
  def _(mo):
103
- mo.md(r"""# Open Syndrome Definition πŸ‘©πŸ½β€πŸ”¬""")
 
 
104
  return
105
 
106
 
107
  @app.cell
108
- def _(mo):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  mo.callout(
110
  mo.md(
111
- "This is a prototype of how to filter your data using definitions from the Open Syndrome Initiative.\n\n"
112
- "We do not store any data."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  ),
114
  kind="neutral",
115
  )
@@ -140,8 +188,7 @@ def _(EXAMPLE_DATASETS, mo):
140
  value=_datasets[0] if _datasets else None,
141
  label="Example dataset",
142
  )
143
- sample_file = mo.ui.file(kind="area", filetypes=[".csv"])
144
-
145
  return data_source, example_picker, sample_file
146
 
147
 
@@ -172,7 +219,7 @@ def _(EXAMPLE_DATASETS, data_source, example_picker, pl, sample_file):
172
 
173
  @app.cell
174
  def _(EXAMPLE_DATASETS, data_source, example_picker):
175
- _default_yaml = """\
176
  profiles:
177
  - name: my_dataset
178
  # value_encodings: # optional β€” map OSD canonical values to dataset-specific ones
@@ -218,9 +265,14 @@ def _(df_selected, initial_date_column, initial_yaml, mo):
218
 
219
  date_format_input = mo.ui.text(
220
  value="%Y-%m-%d %H:%M:%S",
221
- label="Date format",
222
  )
223
 
 
 
 
 
 
224
  _cols_hint = "`, `".join(df_selected.columns)
225
 
226
  mo.vstack(
@@ -233,7 +285,7 @@ def _(df_selected, initial_date_column, initial_yaml, mo):
233
  f"Your dataset columns: `{_cols_hint}`"
234
  ),
235
  mo.hstack(
236
- [yaml_editor, mo.vstack([date_column_picker, date_format_input])],
237
  widths=[3, 1],
238
  align="start",
239
  ),
@@ -307,7 +359,9 @@ def _(definition_options, mo):
307
 
308
  @app.cell
309
  def _(mo):
310
- mo.md(r"""### Data sample""")
 
 
311
  return
312
 
313
 
@@ -320,13 +374,17 @@ def _(df_selected, mo):
320
 
321
  @app.cell
322
  def _(mo):
323
- mo.md(r"""---""")
 
 
324
  return
325
 
326
 
327
  @app.cell
328
  def _(mo):
329
- mo.md(r"""## Data & Definitions""")
 
 
330
  return
331
 
332
 
 
1
  import marimo
2
 
3
+ __generated_with = "0.21.0"
4
  app = marimo.App(
5
  width="medium",
6
  app_title="Open Syndrome Definition - Data Browser",
 
100
 
101
  @app.cell
102
  def _(mo):
103
+ mo.md(r"""
104
+ # Open Syndrome Definition πŸ‘©πŸ½β€πŸ”¬
105
+ """)
106
  return
107
 
108
 
109
  @app.cell
110
+ def _():
111
+ prompt = """
112
+ Role: Act as an expert in healthcare data engineering and the Open Syndrome Definition (OSD) framework.
113
+
114
+ Task: Generate two text files for testing data filtering and syndromic surveillance pipelines.
115
+
116
+ File 1: Synthetic Dataset (CSV Format)
117
+
118
+ Create a synthetic dataset of roughly 20 ambulatory care records.
119
+
120
+ The data must be in [Insert Language, e.g., Brazilian Portuguese, English, German].
121
+
122
+ Include the following columns: recording_ts (timestamp), icd_code (ICD-10 code), sex (encoded as [Insert Encoding, e.g., M/F/D]), age (integer), and chief_complaint (string of the symptoms).
123
+
124
+ Ensure the clinical presentation logically matches the ICD-10 code and age.
125
+
126
+ File 2: OSD Mapping File (YAML Format)
127
+
128
+ Create a YAML configuration file that maps the CSV columns to Open Syndrome Definition concepts.
129
+
130
+ Define a profile named ambulatory_care.
131
+
132
+ Include a value_encodings section that defines the mapping for the sex column.
133
+
134
+ Under columns, map each CSV column to its respective OSD concept (e.g., demographic_criteria, diagnosis), attribute (e.g., age, sex), and dtype (integer, string).
135
+
136
+ Please output the exact CSV and YAML code in clearly separated code blocks so I can copy them directly into my environment.
137
+ """
138
+ return (prompt,)
139
+
140
+
141
+ @app.cell
142
+ def _(mo, prompt):
143
  mo.callout(
144
  mo.md(
145
+ f"""
146
+ This is a prototype for filtering your CSV data using definitions from the [Open Syndrome Initiative](https://opensyndrome.org/).
147
+
148
+ You can either provide a sample of your own data, up to 10 MB, or generate a toy dataset using your preferred GenAI tool.
149
+
150
+ <details>
151
+ <summary>Prompt</summary>
152
+ ```
153
+ {prompt}
154
+ ```
155
+ </details>
156
+
157
+ Next, you will need to create a map of your data and the Open Syndrome Definition concepts you want to filter on. Don't worry! We have an example ready for you.
158
+
159
+ **Please note that we do not store any data**.
160
+ """
161
  ),
162
  kind="neutral",
163
  )
 
188
  value=_datasets[0] if _datasets else None,
189
  label="Example dataset",
190
  )
191
+ sample_file = mo.ui.file(kind="area", filetypes=[".csv"], max_size=10_000_000)
 
192
  return data_source, example_picker, sample_file
193
 
194
 
 
219
 
220
  @app.cell
221
  def _(EXAMPLE_DATASETS, data_source, example_picker):
222
+ _default_yaml = """
223
  profiles:
224
  - name: my_dataset
225
  # value_encodings: # optional β€” map OSD canonical values to dataset-specific ones
 
265
 
266
  date_format_input = mo.ui.text(
267
  value="%Y-%m-%d %H:%M:%S",
268
+ label="Date format<sup>1</sup>",
269
  )
270
 
271
+ date_block = mo.vstack([
272
+ date_format_input,
273
+ mo.md("[^1]: A Python date format code compatible with your data. See other date formats [here](https://strftime.org/).")
274
+ ])
275
+
276
  _cols_hint = "`, `".join(df_selected.columns)
277
 
278
  mo.vstack(
 
285
  f"Your dataset columns: `{_cols_hint}`"
286
  ),
287
  mo.hstack(
288
+ [yaml_editor, mo.vstack([date_column_picker, date_block])],
289
  widths=[3, 1],
290
  align="start",
291
  ),
 
359
 
360
  @app.cell
361
  def _(mo):
362
+ mo.md(r"""
363
+ ### Data sample
364
+ """)
365
  return
366
 
367
 
 
374
 
375
  @app.cell
376
  def _(mo):
377
+ mo.md(r"""
378
+ ---
379
+ """)
380
  return
381
 
382
 
383
  @app.cell
384
  def _(mo):
385
+ mo.md(r"""
386
+ ## Data & Definitions
387
+ """)
388
  return
389
 
390
 
pyproject.toml CHANGED
@@ -5,8 +5,8 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
- "marimo>=0.8.0",
9
- "opensyndrome @ git+https://github.com/OpenSyndrome/open-syndrome-python.git@filter-engine",
10
  "plotly>=6.2.0",
11
  "polars>=1.38.1",
12
  ]
 
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
+ "marimo>=0.21.0",
9
+ "opensyndrome @ git+https://github.com/OpenSyndrome/open-syndrome-python.git@main",
10
  "plotly>=6.2.0",
11
  "polars>=1.38.1",
12
  ]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff