Spaces:
Running
Running
Update.
Browse files- core/state.py +7 -0
- cypress.config.js +1 -0
- cypress/e2e/createManually.cy.js +1 -1
- cypress/e2e/uploadCsv.cy.js +1 -1
- views/files.py +2 -5
- views/load.py +23 -21
- views/record_sets.py +14 -2
- views/splash.py +17 -4
core/state.py
CHANGED
|
@@ -20,6 +20,7 @@ from core.constants import OAUTH_CLIENT_SECRET
|
|
| 20 |
from core.constants import PAST_PROJECTS_PATH
|
| 21 |
from core.constants import PROJECT_FOLDER_PATTERN
|
| 22 |
from core.constants import REDIRECT_URI
|
|
|
|
| 23 |
import mlcroissant as mlc
|
| 24 |
|
| 25 |
|
|
@@ -257,6 +258,8 @@ class Metadata:
|
|
| 257 |
del self.distribution[key]
|
| 258 |
|
| 259 |
def add_record_set(self, record_set: RecordSet) -> None:
|
|
|
|
|
|
|
| 260 |
self.record_sets.append(record_set)
|
| 261 |
|
| 262 |
def remove_record_set(self, key: int) -> None:
|
|
@@ -323,3 +326,7 @@ class Metadata:
|
|
| 323 |
distribution=distribution,
|
| 324 |
record_sets=record_sets,
|
| 325 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from core.constants import PAST_PROJECTS_PATH
|
| 21 |
from core.constants import PROJECT_FOLDER_PATTERN
|
| 22 |
from core.constants import REDIRECT_URI
|
| 23 |
+
from core.names import find_unique_name
|
| 24 |
import mlcroissant as mlc
|
| 25 |
|
| 26 |
|
|
|
|
| 258 |
del self.distribution[key]
|
| 259 |
|
| 260 |
def add_record_set(self, record_set: RecordSet) -> None:
|
| 261 |
+
name = find_unique_name(self.names(), record_set.name)
|
| 262 |
+
record_set.name = name
|
| 263 |
self.record_sets.append(record_set)
|
| 264 |
|
| 265 |
def remove_record_set(self, key: int) -> None:
|
|
|
|
| 326 |
distribution=distribution,
|
| 327 |
record_sets=record_sets,
|
| 328 |
)
|
| 329 |
+
|
| 330 |
+
def names(self) -> set[str]:
|
| 331 |
+
nodes = self.distribution + self.record_sets
|
| 332 |
+
return set([node.name for node in nodes])
|
cypress.config.js
CHANGED
|
@@ -3,5 +3,6 @@ const { defineConfig } = require("cypress");
|
|
| 3 |
module.exports = defineConfig({
|
| 4 |
// To access content within Streamlit iframes for custom components:
|
| 5 |
chromeWebSecurity: false,
|
|
|
|
| 6 |
e2e: {},
|
| 7 |
});
|
|
|
|
| 3 |
module.exports = defineConfig({
|
| 4 |
// To access content within Streamlit iframes for custom components:
|
| 5 |
chromeWebSecurity: false,
|
| 6 |
+
defaultCommandTimeout: 10000,
|
| 7 |
e2e: {},
|
| 8 |
});
|
cypress/e2e/createManually.cy.js
CHANGED
|
@@ -8,7 +8,7 @@ describe('Create a resource manually', () => {
|
|
| 8 |
it('should allow adding a FileObject resource', () => {
|
| 9 |
// Streamlit starts on :8501.
|
| 10 |
cy.visit('http://localhost:8501')
|
| 11 |
-
cy.get('button'
|
| 12 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
| 13 |
cy.get('[data-testid="stMarkdownContainer"]')
|
| 14 |
.contains('Metadata')
|
|
|
|
| 8 |
it('should allow adding a FileObject resource', () => {
|
| 9 |
// Streamlit starts on :8501.
|
| 10 |
cy.visit('http://localhost:8501')
|
| 11 |
+
cy.get('button').contains('Create').click()
|
| 12 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
| 13 |
cy.get('[data-testid="stMarkdownContainer"]')
|
| 14 |
.contains('Metadata')
|
cypress/e2e/uploadCsv.cy.js
CHANGED
|
@@ -8,7 +8,7 @@ describe('Editor loads a local CSV as a resource', () => {
|
|
| 8 |
it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
|
| 9 |
// Streamlit starts on :8501.
|
| 10 |
cy.visit('http://localhost:8501')
|
| 11 |
-
cy.get('button'
|
| 12 |
|
| 13 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
| 14 |
cy.get('[data-testid="stMarkdownContainer"]')
|
|
|
|
| 8 |
it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
|
| 9 |
// Streamlit starts on :8501.
|
| 10 |
cy.visit('http://localhost:8501')
|
| 11 |
+
cy.get('button').contains('Create').click()
|
| 12 |
|
| 13 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
| 14 |
cy.get('[data-testid="stMarkdownContainer"]')
|
views/files.py
CHANGED
|
@@ -113,11 +113,8 @@ def _render_upload_panel():
|
|
| 113 |
url = st.session_state[_DISTANT_URL_KEY]
|
| 114 |
uploaded_file = st.session_state[_LOCAL_FILE_KEY]
|
| 115 |
file_type = FILE_TYPES[file_type_name]
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
+ st.session_state[Metadata].record_sets
|
| 119 |
-
)
|
| 120 |
-
names = set([node.name for node in nodes])
|
| 121 |
if url:
|
| 122 |
file = file_from_url(file_type, url, names)
|
| 123 |
elif uploaded_file:
|
|
|
|
| 113 |
url = st.session_state[_DISTANT_URL_KEY]
|
| 114 |
uploaded_file = st.session_state[_LOCAL_FILE_KEY]
|
| 115 |
file_type = FILE_TYPES[file_type_name]
|
| 116 |
+
metadata: Metadata = st.session_state[Metadata]
|
| 117 |
+
names = metadata.names()
|
|
|
|
|
|
|
|
|
|
| 118 |
if url:
|
| 119 |
file = file_from_url(file_type, url, names)
|
| 120 |
elif uploaded_file:
|
views/load.py
CHANGED
|
@@ -3,6 +3,7 @@ import os
|
|
| 3 |
from etils import epath
|
| 4 |
import streamlit as st
|
| 5 |
|
|
|
|
| 6 |
from core.past_projects import save_current_project
|
| 7 |
from core.state import CurrentStep
|
| 8 |
from core.state import Metadata
|
|
@@ -10,25 +11,26 @@ import mlcroissant as mlc
|
|
| 10 |
from utils import jump_to
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def render_load():
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
newfile_name = (
|
| 19 |
-
epath.Path("~").expanduser()
|
| 20 |
-
/ ".cache"
|
| 21 |
-
/ "croissant"
|
| 22 |
-
/ "loaded_croissant"
|
| 23 |
-
)
|
| 24 |
-
os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
|
| 25 |
-
with open(newfile_name, mode="wb+") as outfile:
|
| 26 |
-
outfile.write(file_cont)
|
| 27 |
-
dataset = mlc.Dataset(newfile_name)
|
| 28 |
-
st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
|
| 29 |
-
jump_to(CurrentStep.editor)
|
| 30 |
-
save_current_project()
|
| 31 |
-
st.rerun()
|
| 32 |
-
except mlc.ValidationError as e:
|
| 33 |
-
st.warning(e)
|
| 34 |
-
st.toast(body="Invalid Croissant File!", icon="🔥")
|
|
|
|
| 3 |
from etils import epath
|
| 4 |
import streamlit as st
|
| 5 |
|
| 6 |
+
from core.constants import EDITOR_CACHE
|
| 7 |
from core.past_projects import save_current_project
|
| 8 |
from core.state import CurrentStep
|
| 9 |
from core.state import Metadata
|
|
|
|
| 11 |
from utils import jump_to
|
| 12 |
|
| 13 |
|
| 14 |
+
def _on_file_upload(key):
|
| 15 |
+
file = st.session_state[key]
|
| 16 |
+
file_cont = file.read()
|
| 17 |
+
# TODO(marcenacp): The Python library should support loading from an open file/dict.
|
| 18 |
+
newfile_name = EDITOR_CACHE / "loaded_croissant"
|
| 19 |
+
os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
|
| 20 |
+
with open(newfile_name, mode="wb+") as outfile:
|
| 21 |
+
outfile.write(file_cont)
|
| 22 |
+
try:
|
| 23 |
+
dataset = mlc.Dataset(newfile_name)
|
| 24 |
+
st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
|
| 25 |
+
jump_to(CurrentStep.editor)
|
| 26 |
+
save_current_project()
|
| 27 |
+
except mlc.ValidationError as e:
|
| 28 |
+
st.warning(e)
|
| 29 |
+
st.toast(body="Invalid Croissant File!", icon="🔥")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def render_load():
|
| 33 |
+
key = "json-ld-file-upload"
|
| 34 |
+
st.file_uploader(
|
| 35 |
+
"Select a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
|
| 36 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/record_sets.py
CHANGED
|
@@ -83,6 +83,11 @@ def _find_joins(fields: list[Field]) -> set[Join]:
|
|
| 83 |
return joins
|
| 84 |
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
def _handle_fields_change(record_set_key: int, record_set: RecordSet):
|
| 87 |
data_editor_key = _data_editor_key(record_set_key, record_set)
|
| 88 |
result = st.session_state[data_editor_key]
|
|
@@ -148,7 +153,7 @@ def _render_left_panel():
|
|
| 148 |
record_sets = st.session_state[Metadata].record_sets
|
| 149 |
record_set: RecordSet
|
| 150 |
for record_set_key, record_set in enumerate(record_sets):
|
| 151 |
-
title = f"**{record_set.name}** ({len(record_set.fields)} fields)"
|
| 152 |
prefix = f"record-set-{record_set_key}"
|
| 153 |
with st.expander(title, expanded=False):
|
| 154 |
col1, col2 = st.columns([1, 3])
|
|
@@ -238,7 +243,8 @@ def _render_left_panel():
|
|
| 238 |
)
|
| 239 |
st.data_editor(
|
| 240 |
fields,
|
| 241 |
-
|
|
|
|
| 242 |
num_rows="dynamic",
|
| 243 |
key=data_editor_key,
|
| 244 |
column_config={
|
|
@@ -269,6 +275,12 @@ def _render_left_panel():
|
|
| 269 |
on_click=_handle_on_click_field,
|
| 270 |
args=(record_set_key, record_set),
|
| 271 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
|
| 274 |
def _render_right_panel():
|
|
|
|
| 83 |
return joins
|
| 84 |
|
| 85 |
|
| 86 |
+
def _handle_create_record_set():
|
| 87 |
+
metadata: Metadata = st.session_state[Metadata]
|
| 88 |
+
metadata.add_record_set(RecordSet(name="new-record-set", description=""))
|
| 89 |
+
|
| 90 |
+
|
| 91 |
def _handle_fields_change(record_set_key: int, record_set: RecordSet):
|
| 92 |
data_editor_key = _data_editor_key(record_set_key, record_set)
|
| 93 |
result = st.session_state[data_editor_key]
|
|
|
|
| 153 |
record_sets = st.session_state[Metadata].record_sets
|
| 154 |
record_set: RecordSet
|
| 155 |
for record_set_key, record_set in enumerate(record_sets):
|
| 156 |
+
title = f"**{record_set.name or '-'}** ({len(record_set.fields)} fields)"
|
| 157 |
prefix = f"record-set-{record_set_key}"
|
| 158 |
with st.expander(title, expanded=False):
|
| 159 |
col1, col2 = st.columns([1, 3])
|
|
|
|
| 243 |
)
|
| 244 |
st.data_editor(
|
| 245 |
fields,
|
| 246 |
+
# There is a bug with `st.data_editor` when the df is empty.
|
| 247 |
+
use_container_width=not fields.empty,
|
| 248 |
num_rows="dynamic",
|
| 249 |
key=data_editor_key,
|
| 250 |
column_config={
|
|
|
|
| 275 |
on_click=_handle_on_click_field,
|
| 276 |
args=(record_set_key, record_set),
|
| 277 |
)
|
| 278 |
+
st.button(
|
| 279 |
+
"Create a new RecordSet",
|
| 280 |
+
key=f"create-new-record-set",
|
| 281 |
+
type="primary",
|
| 282 |
+
on_click=_handle_create_record_set,
|
| 283 |
+
)
|
| 284 |
|
| 285 |
|
| 286 |
def _render_right_panel():
|
views/splash.py
CHANGED
|
@@ -40,8 +40,8 @@ def render_splash():
|
|
| 40 |
)
|
| 41 |
with st.expander("**Try out an example!**", expanded=True):
|
| 42 |
|
| 43 |
-
def create_example():
|
| 44 |
-
url = "https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/
|
| 45 |
try:
|
| 46 |
json = requests.get(url).json()
|
| 47 |
metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
|
|
@@ -50,16 +50,29 @@ def render_splash():
|
|
| 50 |
jump_to(CurrentStep.editor)
|
| 51 |
except Exception as exception:
|
| 52 |
logging.error(exception)
|
| 53 |
-
st.
|
| 54 |
"Sorry, it seems that the example is broken... Can you please"
|
| 55 |
" [open an issue on"
|
| 56 |
" GitHub](https://github.com/mlcommons/croissant/issues/new)?"
|
| 57 |
)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
st.button(
|
| 60 |
-
"
|
| 61 |
on_click=create_example,
|
| 62 |
type="primary",
|
|
|
|
| 63 |
)
|
| 64 |
with col2:
|
| 65 |
with st.expander("**Past projects**", expanded=True):
|
|
|
|
| 40 |
)
|
| 41 |
with st.expander("**Try out an example!**", expanded=True):
|
| 42 |
|
| 43 |
+
def create_example(dataset: str):
|
| 44 |
+
url = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/{dataset.lower()}/metadata.json"
|
| 45 |
try:
|
| 46 |
json = requests.get(url).json()
|
| 47 |
metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
|
|
|
|
| 50 |
jump_to(CurrentStep.editor)
|
| 51 |
except Exception as exception:
|
| 52 |
logging.error(exception)
|
| 53 |
+
st.error(
|
| 54 |
"Sorry, it seems that the example is broken... Can you please"
|
| 55 |
" [open an issue on"
|
| 56 |
" GitHub](https://github.com/mlcommons/croissant/issues/new)?"
|
| 57 |
)
|
| 58 |
|
| 59 |
+
dataset = st.selectbox(
|
| 60 |
+
label="Dataset",
|
| 61 |
+
options=[
|
| 62 |
+
"Titanic",
|
| 63 |
+
"FLORES-200",
|
| 64 |
+
"GPT-3",
|
| 65 |
+
"COCO2014",
|
| 66 |
+
"PASS",
|
| 67 |
+
"MovieLens",
|
| 68 |
+
"Bigcode-The-Stack",
|
| 69 |
+
],
|
| 70 |
+
)
|
| 71 |
st.button(
|
| 72 |
+
f"{dataset} dataset",
|
| 73 |
on_click=create_example,
|
| 74 |
type="primary",
|
| 75 |
+
args=(dataset,),
|
| 76 |
)
|
| 77 |
with col2:
|
| 78 |
with st.expander("**Past projects**", expanded=True):
|