Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from huggingface_hub import HfApi, HfFolder | |
| from datasets import load_dataset | |
| # Function to fetch dataset names for typeahead (autocomplete) | |
| def fetch_dataset_names(query): | |
| api = HfApi() | |
| datasets = api.list_datasets() | |
| filtered_datasets = [d.id for d in datasets if query.lower() in d.id.lower()] | |
| return filtered_datasets | |
| # Function to create a new dataset | |
| def create_sampled_dataset(dataset_name, num_rows, user_token): | |
| # Load the dataset | |
| dataset = load_dataset(dataset_name) | |
| # Sample the dataset | |
| sampled_dataset = dataset['train'].shuffle().select(range(num_rows)) | |
| # Save the sampled dataset to a file (modify this as needed) | |
| sampled_dataset.to_csv('sampled_dataset.csv') | |
| # Here you'd need to implement the logic to upload this dataset to the user's Hf account | |
| # This part is not straightforward and requires using the Hf API to create a new dataset repo | |
| # You'll need to refer to the Hf API documentation for details on how to implement this | |
| return "URL_to_new_dataset" # This should be the URL to the newly created dataset | |
| # Main app | |
| def main(): | |
| st.title("HuggingFace Dataset Sampler") | |
| # User authentication | |
| user_token = st.text_input("Enter your HuggingFace token for authentication") | |
| # Dataset input with typeahead | |
| dataset_query = st.text_input("Enter Dataset Name") | |
| if dataset_query: | |
| dataset_names = fetch_dataset_names(dataset_query) | |
| selected_dataset = st.selectbox("Select Dataset", options=dataset_names) | |
| else: | |
| selected_dataset = None | |
| # Number of rows input | |
| num_rows = st.number_input("Enter number of rows to sample", min_value=1, step=1) | |
| # Button to create new dataset | |
| if st.button("Create Sampled Dataset"): | |
| if user_token and selected_dataset and num_rows: | |
| try: | |
| # Create the sampled dataset and get its URL | |
| dataset_url = create_sampled_dataset(selected_dataset, num_rows, user_token) | |
| st.success(f"Dataset created successfully! Find it here: {dataset_url}") | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |
| else: | |
| st.error("Please fill in all required fields.") | |
| if __name__ == "__main__": | |
| main() | |