Spaces:
Sleeping
Sleeping
update filtering
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ all_languages = list(tags.keys())
|
|
| 18 |
|
| 19 |
|
| 20 |
|
| 21 |
-
@st.
|
| 22 |
def load_data(language, ext):
|
| 23 |
ds = load_dataset(
|
| 24 |
"loubnabnl/the-stack-inspection-data",
|
|
@@ -41,18 +41,17 @@ st.sidebar.header("Filters")
|
|
| 41 |
not_lexable = st.sidebar.checkbox("Not lexable")
|
| 42 |
min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
|
| 43 |
max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
|
| 44 |
-
max_mean_line_length = st.sidebar.slider("Maximum average line length", 0,
|
| 45 |
st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
|
| 46 |
`alphanumeric_fraction` is smaller than the selected value.")
|
| 47 |
|
| 48 |
# load and filter dataset
|
| 49 |
samples = load_data(chosen_language, chosen_ext)
|
| 50 |
|
| 51 |
-
samples = samples.filter(
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
)
|
| 56 |
if not_lexable:
|
| 57 |
samples = samples.filter(lambda x: not x["lexable"])
|
| 58 |
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
|
| 21 |
+
@st.cache_data()
|
| 22 |
def load_data(language, ext):
|
| 23 |
ds = load_dataset(
|
| 24 |
"loubnabnl/the-stack-inspection-data",
|
|
|
|
| 41 |
not_lexable = st.sidebar.checkbox("Not lexable")
|
| 42 |
min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
|
| 43 |
max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
|
| 44 |
+
max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 500, 0)
|
| 45 |
st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
|
| 46 |
`alphanumeric_fraction` is smaller than the selected value.")
|
| 47 |
|
| 48 |
# load and filter dataset
|
| 49 |
samples = load_data(chosen_language, chosen_ext)
|
| 50 |
|
| 51 |
+
samples = samples.filter(lambda x: x["alphanum_fraction"] < min_alphanum)
|
| 52 |
+
samples = samples.filter(lambda x: x["max_line_length"] > max_line_length)
|
| 53 |
+
samples = samples.filter(lambda x: x["avg_line_length"] > max_mean_line_length)
|
| 54 |
+
|
|
|
|
| 55 |
if not_lexable:
|
| 56 |
samples = samples.filter(lambda x: not x["lexable"])
|
| 57 |
|