Spaces:

ddebree
/

mathvision-jepa-explorer

Running

App Files Files Community

ddebree commited on 29 days ago

Commit

cff9aa4

1 Parent(s): 5f7d974

Make

Browse files

Files changed (1) hide show

src/mathvision_explorer/streamlit_app.py +55 -45

src/mathvision_explorer/streamlit_app.py CHANGED Viewed

@@ -60,54 +60,64 @@ def main(jsonl_path: Path = Path("data/demo/demo.jsonl")) -> None:
             f"[Example: MathLLMs/MathVision]({MATHVISION_DATASET_URL}) | "
             f"[Browse HF datasets]({HF_DATASETS_URL})"
         )
-        uploaded_dataset = st.file_uploader(
-            "Upload dataset",
-            type=["jsonl", "zip"],
             help=(
-                "Use a JSONL file for text-only records, or a ZIP containing one JSONL "
-                "file plus referenced images."
             ),
         )
-        hf_dataset_ref = st.text_input(
-            "HF dataset URL or ID",
-            placeholder="MathLLMs/MathVision",
-            help="Paste a Hugging Face dataset URL or repo id, then load a split below.",
-        )
-        hf_split = st.text_input(
-            "HF split",
-            value="test",
-            help="Dataset split to load, such as test, train, validation, or testmini.",
-        )
-        hf_limit = st.number_input(
-            "HF max records",
-            min_value=1,
-            max_value=500,
-            value=50,
-            step=10,
-            help="Cap rows loaded from Hugging Face so exploration stays responsive.",
-        )
-        if st.button(
-            "Load HF dataset",
-            help="Download the selected split and convert compatible rows into records.",
-        ):
-            try:
-                records = _load_hf_dataset_records(
-                    st,
-                    hf_dataset_ref,
-                    split=hf_split,
-                    limit=int(hf_limit),
-                )
-            except (RuntimeError, ValueError, OSError) as error:
-                st.error(str(error))
-                st.stop()
-                raise RuntimeError("Streamlit stopped after HF dataset load error.") from error
-            st.session_state["hf_dataset_records"] = records
-        elif uploaded_dataset is not None:
-            records = _load_uploaded_records(st, uploaded_dataset)
-            subjects = sorted({record.subject for record in records if record.subject is not None})
-            levels = sorted({record.level for record in records if record.level is not None})
-        elif "hf_dataset_records" in st.session_state:
-            records = st.session_state["hf_dataset_records"]
         subjects = sorted({record.subject for record in records if record.subject is not None})
         levels = sorted({record.level for record in records if record.level is not None})
         summary = summarize_records(records)

             f"[Example: MathLLMs/MathVision]({MATHVISION_DATASET_URL}) | "
             f"[Browse HF datasets]({HF_DATASETS_URL})"
         )
+        dataset_source = st.radio(
+            "Dataset source",
+            ["Demo", "Hugging Face URL", "Upload file"],
+            horizontal=False,
             help=(
+                "Choose whether to use the bundled demo, paste a Hub dataset link, "
+                "or upload files."
             ),
         )
+        if dataset_source == "Hugging Face URL":
+            hf_dataset_ref = st.text_input(
+                "HF dataset URL or ID",
+                value="MathLLMs/MathVision",
+                placeholder="https://huggingface.co/datasets/MathLLMs/MathVision",
+                help="Paste a Hugging Face dataset URL or repo id.",
+            )
+            hf_split = st.text_input(
+                "HF split",
+                value="test",
+                help="Dataset split to load, such as test, train, validation, or testmini.",
+            )
+            hf_limit = st.number_input(
+                "HF max records",
+                min_value=1,
+                max_value=500,
+                value=50,
+                step=10,
+                help="Cap rows loaded from Hugging Face so exploration stays responsive.",
+            )
+            if st.button(
+                "Load HF dataset",
+                help="Download the selected split and convert compatible rows into records.",
+            ):
+                try:
+                    records = _load_hf_dataset_records(
+                        st,
+                        hf_dataset_ref,
+                        split=hf_split,
+                        limit=int(hf_limit),
+                    )
+                except (RuntimeError, ValueError, OSError) as error:
+                    st.error(str(error))
+                    st.stop()
+                    raise RuntimeError("Streamlit stopped after HF dataset load error.") from error
+                st.session_state["hf_dataset_records"] = records
+            elif "hf_dataset_records" in st.session_state:
+                records = st.session_state["hf_dataset_records"]
+        elif dataset_source == "Upload file":
+            uploaded_dataset = st.file_uploader(
+                "Upload dataset",
+                type=["jsonl", "zip"],
+                help=(
+                    "Use a JSONL file for text-only records, or a ZIP containing one JSONL "
+                    "file plus referenced images."
+                ),
+            )
+            if uploaded_dataset is not None:
+                records = _load_uploaded_records(st, uploaded_dataset)
         subjects = sorted({record.subject for record in records if record.subject is not None})
         levels = sorted({record.level for record in records if record.level is not None})
         summary = summarize_records(records)