import gradio as gr from datasets import load_dataset, Dataset # Define the dataset name and split DATASET_NAME = "Thang/wikides" SPLIT = "train" # Function to fetch data with a limit on demand def get_data_with_limit(limit: int = 50): """ Loads and fetches data from the dataset with a specified limit on each call. The limit defaults to 50 if not provided or if it's invalid. """ # Type and value checking for the limit parameter if not isinstance(limit, int) or limit < 1: limit = 50 try: # Load the dataset on demand. # This will download the data each time if not in cache, # but prevents it from being stored on the disk permanently. dataset = load_dataset(DATASET_NAME, split=SPLIT) # Ensure the limit doesn't exceed the dataset size num_rows = len(dataset) if limit > num_rows: limit = num_rows # Slice the dataset to get the required number of rows and convert to a dictionary return dataset.select(range(limit)).to_dict() except Exception as e: # Return a structured error response if something goes wrong return {"error": "Internal Server Error", "message": str(e)} # Create the Gradio interface demo = gr.Interface( fn=get_data_with_limit, inputs=[ gr.Number(label="limit", value=50, minimum=1) ], outputs=[ gr.JSON(label="Data") ], title="WikiDES Data API", description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.", examples=[ [10], [25], [100] ], allow_flagging="never", api_name="data" ) # Launch the Gradio app demo.launch()