Spaces:
Running
Running
File size: 2,941 Bytes
7924d77 1f9efe8 3bd4bd2 415d76d 52cb009 3bd4bd2 415d76d 3bd4bd2 7924d77 eee62c1 706aa29 1f9efe8 415d76d 52cb009 706aa29 e476ef0 415d76d 706aa29 e476ef0 706aa29 e476ef0 706aa29 52cb009 a386c0e 415d76d a386c0e 415d76d 1f9efe8 52cb009 706aa29 a386c0e 1f9efe8 0452707 706aa29 52cb009 a386c0e 706aa29 415d76d e476ef0 415d76d 1f9efe8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from datasets import load_dataset
import nbformat
from nbconvert import HTMLExporter
from traitlets.config import Config
import os
import shutil
import base64
# Configuration for HTMLExporter
config = Config()
config.HTMLExporter.preprocessors = ["nbconvert.preprocessors.ExtractOutputPreprocessor"]
html_exporter = HTMLExporter(config=config, template_name="classic")
ds = load_dataset("data-agents/kaggle-notebooks-edu-v0")
ds_out = ds.filter(lambda x: x["contains_outputs"])
TMP_DIR = './tmp/'
def reset_tmp_folder():
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR)
os.makedirs(TMP_DIR)
def embed_figures(html_body, resources):
for key, value in resources['outputs'].items():
b64_figure = base64.b64encode(value).decode('utf-8')
img_tag = f'data:image/png;base64,{b64_figure}'
html_body = html_body.replace(key, img_tag)
return html_body
def update_max_index(score_option, output_option):
if output_option == "All":
max_index = len(ds[score_option])
else:
max_index = len(ds_out[score_option])
return gr.Slider(maximum=max_index), gr.Number(value=0, maximum=max_index)
def parse_notebook(score_options, output_options, index):
reset_tmp_folder()
if output_options == "All":
sample = ds[score_options][i]
else:
sample = ds_out[score_options][i]
notebook_string = sample["text"]
notebook_id = sample["id"].split("/")[-1]
out_path = os.path.join(TMP_DIR, notebook_id)
# Save the notebook string to a file
with open(out_path, 'w') as f:
f.write(notebook_string)
notebook_parsed = nbformat.reads(notebook_string, as_version=4)
(notebook_body, resources) = html_exporter.from_notebook_node(notebook_parsed)
notebook_body = embed_figures(notebook_body, resources)
return notebook_body, out_path
with gr.Blocks() as demo:
gr.Markdown("# Kaggle Notebooks")
score_options = gr.Dropdown(["error","0", "1", "2", "3", "4", "5"], value="5", label="Notebook score", info="Select the assigned notebook score.")
output_options = gr.Radio(["Outputs only", "All"], value="Outputs only", label="Output filter", info="Many notebooks contain no outputs.")
index_slider = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Index")
file = gr.File()
html = gr.HTML("")
score_options.change(
fn=update_max_index,
inputs=[score_options, output_options],
outputs=[index_slider, index_slider]
)
output_options.change(
fn=update_max_index,
inputs=[score_options, output_options],
outputs=[index_slider, index_slider]
)
index_slider.change(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file])
demo.load(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file])
demo.launch() |