lvwerra's picture
lvwerra HF Staff
preprocessing attempt
3bd4bd2
raw
history blame
1.21 kB
import gradio as gr
from datasets import load_dataset
import nbformat
from nbconvert import HTMLExporter
from traitlets.config import Config
# we want to export the outputs too:
config = Config()
config.HTMLExporter.preprocessors = ["nbconvert.preprocessors.ExtractOutputPreprocessor"]
# Instantiate the exporter. We use the `classic` template for now; we'll get into more details
# later about how to customize the exporter further.
html_exporter = HTMLExporter(config=config, template_name="classic")
ds = load_dataset("data-agents/kaggle-notebooks", split="train", streaming=True)
ds_iter = iter(ds)
def parse_notebook():
notebook_string = next(ds_iter)["text"]
notebook_parsed = nbformat.reads(notebook_string, as_version=4)
(notebook_body, resources) = html_exporter.from_notebook_node(notebook_parsed)
print("Resources:", resources["outputs"])
return notebook_body
with gr.Blocks() as demo:
gr.Markdown("# Kaggle Notebooks")
button = gr.Button("Show next!")
with gr.Row():
with gr.Column():
html = gr.HTML("")
button.click(fn=parse_notebook, inputs=[], outputs=[html])
demo.load(fn=parse_notebook, inputs=[], outputs=[html])
demo.launch()