Spaces:
Running
Running
| """ | |
| DataView MCP - A comprehensive MCP server for exploring Hugging Face datasets. | |
| This MCP server provides 10 tools for searching, sampling, profiling, and | |
| discovering datasets on the Hugging Face Hub. | |
| Tools: | |
| 1. search_datasets - Find datasets by keyword, task, or domain | |
| 2. search_by_columns - Find datasets with specific column names | |
| 3. get_dataset_info - Get detailed metadata and README | |
| 4. get_schema - Get column names and data types | |
| 5. sample_rows - Get actual data samples | |
| 6. get_statistics - Compute column statistics | |
| 7. profile_quality - Assess data quality issues | |
| 8. find_similar - Find similar datasets | |
| 9. suggest_tasks - Suggest ML tasks for a dataset | |
| 10. compare_datasets - Compare two datasets side-by-side | |
| Usage: | |
| # Run locally | |
| python app.py | |
| # Or with Gradio CLI | |
| gradio app.py | |
| # Connect via MCP | |
| Add to your MCP client config: | |
| { | |
| "mcpServers": { | |
| "dataview": { | |
| "url": "http://localhost:7860/gradio_api/mcp/sse" | |
| } | |
| } | |
| } | |
| """ | |
| import gradio as gr | |
| from typing import Optional, List | |
| # Import all tools | |
| from tools.search import search_datasets, search_by_columns | |
| from tools.metadata import get_dataset_info, get_schema | |
| from tools.sampling import sample_rows | |
| from tools.profiling import get_statistics, profile_quality | |
| from tools.discovery import find_similar, suggest_tasks, compare_datasets | |
| # Create Gradio interfaces for each tool | |
| # Note: Gradio will automatically convert these to MCP tools | |
| def create_demo(): | |
| """Create the Gradio demo with all tools.""" | |
| with gr.Blocks( | |
| title="DataView MCP - HuggingFace Dataset Explorer", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown(""" | |
| # DataView MCP | |
| ## Explore Hugging Face Datasets with AI | |
| This MCP server provides tools for AI assistants to explore, analyze, and | |
| understand datasets on the Hugging Face Hub. | |
| **10 Tools Available:** | |
| - Search & Discovery: `search_datasets`, `search_by_columns`, `find_similar` | |
| - Metadata: `get_dataset_info`, `get_schema` | |
| - Data Access: `sample_rows` | |
| - Analysis: `get_statistics`, `profile_quality` | |
| - Intelligence: `suggest_tasks`, `compare_datasets` | |
| --- | |
| ### Try the tools below or connect via MCP | |
| """) | |
| with gr.Tabs(): | |
| # Search Tab | |
| with gr.Tab("Search"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| search_query = gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g., sentiment analysis, medical imaging" | |
| ) | |
| search_limit = gr.Slider(1, 50, value=10, step=1, label="Max Results") | |
| search_task = gr.Dropdown( | |
| choices=[ | |
| None, "text-classification", "question-answering", | |
| "summarization", "translation", "image-classification", | |
| "object-detection", "text-generation" | |
| ], | |
| label="Filter by Task (optional)" | |
| ) | |
| search_btn = gr.Button("Search Datasets", variant="primary") | |
| with gr.Column(): | |
| search_output = gr.Markdown(label="Results") | |
| search_btn.click( | |
| search_datasets, | |
| inputs=[search_query, search_limit, search_task], | |
| outputs=search_output | |
| ) | |
| # Dataset Info Tab | |
| with gr.Tab("Dataset Info"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| info_dataset_id = gr.Textbox( | |
| label="Dataset ID", | |
| placeholder="e.g., imdb, squad, huggingface/documentation-images" | |
| ) | |
| info_btn = gr.Button("Get Info", variant="primary") | |
| schema_btn = gr.Button("Get Schema") | |
| with gr.Column(): | |
| info_output = gr.Markdown(label="Dataset Info") | |
| info_btn.click(get_dataset_info, inputs=[info_dataset_id], outputs=info_output) | |
| schema_btn.click(get_schema, inputs=[info_dataset_id], outputs=info_output) | |
| # Sample Data Tab | |
| with gr.Tab("Sample Data"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| sample_dataset_id = gr.Textbox( | |
| label="Dataset ID", | |
| placeholder="e.g., imdb" | |
| ) | |
| sample_n_rows = gr.Slider(1, 20, value=5, step=1, label="Number of Rows") | |
| sample_split = gr.Dropdown( | |
| choices=["train", "test", "validation"], | |
| value="train", | |
| label="Split" | |
| ) | |
| sample_btn = gr.Button("Get Sample", variant="primary") | |
| with gr.Column(): | |
| sample_output = gr.Markdown(label="Sample Data") | |
| sample_btn.click( | |
| sample_rows, | |
| inputs=[sample_dataset_id, sample_n_rows, gr.State(None), sample_split], | |
| outputs=sample_output | |
| ) | |
| # Analysis Tab | |
| with gr.Tab("Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| analysis_dataset_id = gr.Textbox( | |
| label="Dataset ID", | |
| placeholder="e.g., imdb" | |
| ) | |
| analysis_sample_size = gr.Slider( | |
| 100, 2000, value=500, step=100, | |
| label="Sample Size for Analysis" | |
| ) | |
| stats_btn = gr.Button("Get Statistics", variant="primary") | |
| quality_btn = gr.Button("Profile Quality") | |
| with gr.Column(): | |
| analysis_output = gr.Markdown(label="Analysis Results") | |
| stats_btn.click( | |
| get_statistics, | |
| inputs=[analysis_dataset_id, gr.State(None), gr.State("train"), analysis_sample_size], | |
| outputs=analysis_output | |
| ) | |
| quality_btn.click( | |
| profile_quality, | |
| inputs=[analysis_dataset_id, gr.State(None), gr.State("train"), analysis_sample_size], | |
| outputs=analysis_output | |
| ) | |
| # Discovery Tab | |
| with gr.Tab("Discovery"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| discovery_dataset_id = gr.Textbox( | |
| label="Dataset ID", | |
| placeholder="e.g., imdb" | |
| ) | |
| discovery_top_k = gr.Slider(1, 10, value=5, step=1, label="Number of Results") | |
| similar_btn = gr.Button("Find Similar", variant="primary") | |
| suggest_btn = gr.Button("Suggest Tasks") | |
| with gr.Column(): | |
| discovery_output = gr.Markdown(label="Discovery Results") | |
| similar_btn.click( | |
| find_similar, | |
| inputs=[discovery_dataset_id, discovery_top_k], | |
| outputs=discovery_output | |
| ) | |
| suggest_btn.click( | |
| suggest_tasks, | |
| inputs=[discovery_dataset_id], | |
| outputs=discovery_output | |
| ) | |
| # Compare Tab | |
| with gr.Tab("Compare"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| compare_dataset_a = gr.Textbox( | |
| label="Dataset A", | |
| placeholder="e.g., imdb" | |
| ) | |
| compare_dataset_b = gr.Textbox( | |
| label="Dataset B", | |
| placeholder="e.g., rotten_tomatoes" | |
| ) | |
| compare_btn = gr.Button("Compare Datasets", variant="primary") | |
| with gr.Column(): | |
| compare_output = gr.Markdown(label="Comparison Results") | |
| compare_btn.click( | |
| compare_datasets, | |
| inputs=[compare_dataset_a, compare_dataset_b], | |
| outputs=compare_output | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### MCP Connection | |
| To use with Claude or other MCP clients, add this to your config: | |
| ```json | |
| { | |
| "mcpServers": { | |
| "dataview": { | |
| "url": "https://YOUR-SPACE.hf.space/gradio_api/mcp/sse" | |
| } | |
| } | |
| } | |
| ``` | |
| --- | |
| Built with Gradio MCP | |
| """) | |
| return demo | |
| # Create the demo | |
| demo = create_demo() | |
| # Launch with MCP server enabled | |
| if __name__ == "__main__": | |
| demo.launch( | |
| mcp_server=True, | |
| share=False, | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) | |