import gradio as gr
from datasets import load_dataset, Dataset

# Define the dataset name and split
DATASET_NAME = "Thang/wikides"
SPLIT = "train"

# Function to fetch data with a limit on demand
def get_data_with_limit(limit: int = 50):
    """
    Loads and fetches data from the dataset with a specified limit on each call.
    The limit defaults to 50 if not provided or if it's invalid.
    """
    # Type and value checking for the limit parameter
    if not isinstance(limit, int) or limit < 1:
        limit = 50

    try:
        # Load the dataset on demand.
        # This will download the data each time if not in cache,
        # but prevents it from being stored on the disk permanently.
        dataset = load_dataset(DATASET_NAME, split=SPLIT)
        
        # Ensure the limit doesn't exceed the dataset size
        num_rows = len(dataset)
        if limit > num_rows:
            limit = num_rows

        # Slice the dataset to get the required number of rows and convert to a dictionary
        return dataset.select(range(limit)).to_dict()
    except Exception as e:
        # Return a structured error response if something goes wrong
        return {"error": "Internal Server Error", "message": str(e)}

# Create the Gradio interface
demo = gr.Interface(
    fn=get_data_with_limit,
    inputs=[
        gr.Number(label="limit", value=50, minimum=1)
    ],
    outputs=[
        gr.JSON(label="Data")
    ],
    title="WikiDES Data API",
    description=f"A simple API to access data from the **{DATASET_NAME}** dataset. The dataset is loaded on-demand for each request. The default limit is 50.",
    examples=[
        [10],
        [25],
        [100]
    ],
    allow_flagging="never",
    api_name="data"
)

# Launch the Gradio app
demo.launch()