Spaces:

lvwerra
/

jupyter-gh-viewer-edu

Running

jupyter-gh-viewer-edu / app.py

lvwerra HF Staff

preprocessing attempt

3bd4bd2 over 1 year ago

1.21 kB

	import gradio as gr
	from datasets import load_dataset
	import nbformat
	from nbconvert import HTMLExporter
	from traitlets.config import Config

	# we want to export the outputs too:
	config = Config()
	config.HTMLExporter.preprocessors = ["nbconvert.preprocessors.ExtractOutputPreprocessor"]

	# Instantiate the exporter. We use the `classic` template for now; we'll get into more details
	# later about how to customize the exporter further.
	html_exporter = HTMLExporter(config=config, template_name="classic")

	ds = load_dataset("data-agents/kaggle-notebooks", split="train", streaming=True)
	ds_iter = iter(ds)

	def parse_notebook():
	notebook_string = next(ds_iter)["text"]
	notebook_parsed = nbformat.reads(notebook_string, as_version=4)
	(notebook_body, resources) = html_exporter.from_notebook_node(notebook_parsed)
	print("Resources:", resources["outputs"])
	return notebook_body


	with gr.Blocks() as demo:
	gr.Markdown("# Kaggle Notebooks")
	button = gr.Button("Show next!")

	with gr.Row():
	with gr.Column():
	html = gr.HTML("")

	button.click(fn=parse_notebook, inputs=[], outputs=[html])
	demo.load(fn=parse_notebook, inputs=[], outputs=[html])
	demo.launch()